diff --git a/.github/markdown-link-check.json b/.github/markdown-link-check.json index 0f29252b..447e57a1 100644 --- a/.github/markdown-link-check.json +++ b/.github/markdown-link-check.json @@ -9,6 +9,9 @@ }, { "pattern": "^#%EF%B8%8F.*" + }, + { + "pattern": "https://developer.nvidia.com/cuda-downloads" } ], "timeout": "20s", diff --git a/.gitignore b/.gitignore index 7f24f41a..77e8e9fd 100644 --- a/.gitignore +++ b/.gitignore @@ -212,11 +212,15 @@ wandb/ third_party/ # third-party files -**/clarius_cast/include -**/clarius_solum/include +holoscan_i4h/install +holoscan_i4h/build # Raytracing Ultrasound Simulator workflows/robotic_ultrasound/scripts/raysim # Bring Your Own XR openxr/ + +# Ignore Holohub CLI +cmake +utilities diff --git a/README.md b/README.md index 359e6647..1f986130 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,9 @@ ## Table of Contents - [Overview](#overview) -- [Key Features](#key-features) -- [Getting Started](#getting-started) -- [Project Structure](#project-structure) +- [Available Workflows](#available-workflows) +- [Tutorials](#tutorials) +- [Repository Structure](#repository-structure) - [Contributing](#contributing) - [Support](#support) - [License](#license) @@ -24,64 +24,44 @@ ## Overview -**[Nvidia Isaac for Healthcare](https://github.com/isaac-for-healthcare)** is a comprehensive 3-computer solution for healthcare robotics, enabling developers to leverage NVIDIA technologies for: +This repository contains **healthcare robotics workflows** - complete, end-to-end implementations that demonstrate how to build, simulate, and deploy robotic systems for specific medical applications using the [Nvidia Isaac for Healthcare](https://github.com/isaac-for-healthcare) platform. -- Accelerating development and testing of AI algorithms with synthetic data generated in digital twin -- Accelerating development, training and testing of robotic policies in digital twin environments -- Enabling development, training and testing of systems with hardware-in-the-loop (HIL) -- Seamlessly transitioning from simulation to physical systems (Sim2Real) - -## Key Features +### What are Workflows? ![Key features](./docs/source/key_features.jpg) -Our framework provides powerful capabilities for healthcare robotics development: +Workflows are comprehensive reference implementations that showcase the complete development pipeline from simulation to real-world deployment. Each workflow includes digital twin environments, AI model training capabilities, and deployment frameworks for specific healthcare robotics applications. -### Digital Twin & Simulation -- **Digital prototyping** of next-gen healthcare robotic systems, sensors and instruments -- **Synthetic data generation (SDG)** for training AI models, augmented with real data -- **Hardware-in-the-loop (HIL)** testing and evaluation of AI models in digital twin environments +### Available Workflows +This repository currently includes three main workflows: -### AI & Robotics Development -- **Data collection** for training robotic policies through imitation learning -- **XR and haptics-enabled teleoperation** of robotic systems in digital twin -- **GPU-accelerated training** of reinforcement and imitation learning algorithms -- **Augmented dexterity training** for robot-assisted surgery +- **[Robotic Surgery](./workflows/robotic_surgery/README.md)** - Physics-based surgical robot simulation framework with photorealistic rendering for developing autonomous surgical skills. Supports da Vinci Research Kit (dVRK), dual-arm configurations, and STAR surgical arms. This workflow enables researchers and medical device companies to train AI models for surgical assistance, validate robot behaviors safely, and accelerate development through GPU-parallelized reinforcement learning. Includes pre-built surgical subtasks like suture needle manipulation and precise reaching tasks. +- **[Robotic Ultrasound](./workflows/robotic_ultrasound/README.md)** - Comprehensive autonomous ultrasound imaging system featuring physics-accurate sensor simulation through GPU-accelerated raytracing technology that models ultrasound wave propagation, tissue interactions, and acoustic properties in real-time. The raytracing ultrasound simulator generates photorealistic B-mode images by simulating acoustic wave physics, enabling synthetic data generation for training AI models without requiring physical ultrasound hardware. Supports multiple AI policies (PI0, GR00T N1), distributed communication via RTI DDS, and Holoscan deployment for clinical applications. This workflow enables medical imaging researchers, ultrasound device manufacturers, and healthcare AI developers to train robotic scanning protocols, validate autonomous imaging algorithms, and accelerate development through GPU-accelerated simulation before clinical deployment. +- **[Telesurgery](./workflows/telesurgery/README.md)** - Real-time remote surgical operations framework supporting both simulated and physical environments with low-latency video streaming, haptic feedback, and distributed control systems. This workflow features H.264/HEVC hardware-accelerated video encoding, RTI DDS communication, cross-platform deployment (x86/AARCH64), and seamless sim-to-real transition. Designed for surgical robotics companies, medical device manufacturers, and telemedicine providers to develop remote surgical capabilities, validate teleoperation systems, and deploy scalable telesurgery solutions across different network conditions. -### Deployment & Training -- **Sim2Real deployment** on Nvidia Holoscan -- **Continuous testing (CT)** of robotic systems through HIL digital twin systems -- **Interactive training** experiences for clinicians/users (pre-op planning, post-op evaluations/insights) +Each workflow provides complete simulation environments, training datasets, pre-trained models, and deployment tools to accelerate your healthcare robotics development. Please see [What's New](./docs/source/whatsnew_0_1_0.md) for details on our milestone releases. -## Getting Started - -For everything you need to get started, including detailed tutorials and step-by-step guides, follow these links to learn more about: - -### Workflows -- [Robotic ultrasound](./workflows/robotic_ultrasound/README.md) -- [Robotic surgery](./workflows/robotic_surgery/README.md) -- [Telesurgery](./workflows/telesurgery/README.md) - ### Tutorials + +Get started with our comprehensive tutorials that guide you through key aspects of the framework: - [Bring your own patient](./tutorials/assets/bring_your_own_patient/README.md) - [Bring your own robot](./tutorials/assets/bring_your_own_robot) -- [Sim2Real Transition](./tutorials/sim2real/README.md) +- [Bring your own xr](./tutorials/assets/bring_your_own_xr) -## Project Structure +## Repository Structure ``` i4h-workflows/ -├── docs/ # Documentation and guides -├── workflows/ # Main workflow implementations -│ ├── robotic_surgery/ # Robotic surgery workflow -│ └── robotic_ultrasound/ # Robotic ultrasound workflow +├── docs/ # Documentation and guides ├── tutorials/ # Tutorial materials -│ ├── assets/ # Asset-related tutorials -│ └── sim2real/ # Sim2Real transition tutorials -├── tests/ # Test suite -└── examples/ # Example implementations +│ ├── assets/ # Asset-related tutorials +│ └── sim2real/ # Sim2Real transition tutorials +├── workflows/ # Main workflow implementations +│ ├── robotic_surgery/ # Robotic surgery workflow +│ ├── robotic_ultrasound/ # Robotic ultrasound workflow +│ └── telesurgery/ # Telesurgery workflow ``` ## Contributing diff --git a/docs/source/telesurgery_workflow.jpg b/docs/source/telesurgery_workflow.jpg index 0462ffa7..2d652407 100644 Binary files a/docs/source/telesurgery_workflow.jpg and b/docs/source/telesurgery_workflow.jpg differ diff --git a/holoscan_i4h/CMakeLists.txt b/holoscan_i4h/CMakeLists.txt new file mode 100644 index 00000000..bfbf2be5 --- /dev/null +++ b/holoscan_i4h/CMakeLists.txt @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.20) + +project(holoscan-i4h) + +set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/../install") + +set(LIB_DIR "${CMAKE_BINARY_DIR}/lib") +set(INCLUDE_DIR "${CMAKE_BINARY_DIR}/include") + +file(MAKE_DIRECTORY ${INCLUDE_DIR}) +file(MAKE_DIRECTORY ${LIB_DIR}) + +add_subdirectory(operators) + +message("install: ${CMAKE_INSTALL_PREFIX}") + +install(DIRECTORY ${LIB_DIR}/ DESTINATION lib) +install(DIRECTORY ${INCLUDE_DIR}/ DESTINATION include) diff --git a/holoscan_i4h/README.md b/holoscan_i4h/README.md new file mode 100644 index 00000000..30175846 --- /dev/null +++ b/holoscan_i4h/README.md @@ -0,0 +1,51 @@ +## Holoscan Operators +The Holoscan operators for Isaac-for-Healthcare are located in the `holoscan_i4h/operators/` directory. These operators are modular components designed to be reused across multiple workflows and applications. + +### Using the Operators +To import these operators in your code, ensure the root of the i4h-workflows repository is included in your PYTHONPATH. You can do this by running: + +```bash +export PYTHONPATH=$PYTHONPATH:/path/to/i4h-workflows/ +``` + +You can then import operators as follows: + +```python +from holoscan_i4h.operators.realsense.realsense import RealsenseOp +``` + +### Directory Structure + +The initial directory layout under `holoscan_i4h/` is straightforward and is shown below. +The primary directory of interest is the `operators/` folder where the Holoscan operators are located. + +``` +holoscan_i4h/ +├── operators/ +│ ├── realsense/ +│ ├── clarius_cast/ +│ ├── clarius_solum/ +│ └── ... +├── CMakeLists.txt +├── README.md +└── __init__.py +``` + +### Build and Install Folders + +Some operators — such as `clarius_solum` — may require a build step. This step is typically triggered automatically as part of the workflow setup process. When this occurs, additional `build/` and `install/` directories will appear: + +* `build/`: A temporary directory used by CMake to generate and compile intermediate build artifacts (e.g., object files, build system metadata). +* `install/`: The final output directory for installed components, such as shared libraries (`.so`) and header files (`.h`), ready to be consumed by the operator. + +Example layout after build: + +``` +holoscan_i4h/ +├── operators/ +├── build/ +├── install/ +├── CMakeLists.txt +├── README.md +└── __init__.py +``` diff --git a/holoscan_i4h/__init__.py b/holoscan_i4h/__init__.py new file mode 100644 index 00000000..32c11276 --- /dev/null +++ b/holoscan_i4h/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/holoscan_i4h/operators/CMakeLists.txt b/holoscan_i4h/operators/CMakeLists.txt new file mode 100644 index 00000000..76338f50 --- /dev/null +++ b/holoscan_i4h/operators/CMakeLists.txt @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_subdirectory(clarius_cast) +add_subdirectory(clarius_solum) diff --git a/holoscan_i4h/operators/__init__.py b/holoscan_i4h/operators/__init__.py new file mode 100644 index 00000000..32c11276 --- /dev/null +++ b/holoscan_i4h/operators/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_cast/CMakeLists.txt b/holoscan_i4h/operators/clarius_cast/CMakeLists.txt similarity index 61% rename from workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_cast/CMakeLists.txt rename to holoscan_i4h/operators/clarius_cast/CMakeLists.txt index df8a9ecc..bffa9b0c 100644 --- a/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_cast/CMakeLists.txt +++ b/holoscan_i4h/operators/clarius_cast/CMakeLists.txt @@ -1,4 +1,5 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + # SPDX-License-Identifier: Apache-2.0 # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,10 +17,8 @@ cmake_minimum_required(VERSION 3.20) project(clarius_cast) -set(CMAKE_CXX_STANDARD 17) - -set(LIB_DIR "${CMAKE_CURRENT_SOURCE_DIR}/lib") -set(INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include/cast") +set(LIB_DIR "${CMAKE_BINARY_DIR}/lib") +set(INCLUDE_DIR "${CMAKE_BINARY_DIR}/include") execute_process( COMMAND lsb_release -sr @@ -28,36 +27,31 @@ execute_process( ) # Download Clarius library and header files -if(NOT EXISTS "${LIB_DIR}/libcast.so") - file(MAKE_DIRECTORY ${LIB_DIR}) +if(NOT EXISTS "${LIB_DIR}/clarius_cast/libcast.so") + file(MAKE_DIRECTORY ${LIB_DIR}/clarius_cast) file(DOWNLOAD - "https://github.com/clariusdev/cast/releases/download/v12.0.2/cast-12.0.2-linux.x86_64-gcc_ubuntu_${UBUNTU_VERSION}.zip" - "${LIB_DIR}/cast.zip" + "${LIB_DIR}/clarius_cast/cast.zip" ) # Unzip the file execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${LIB_DIR}/cast.zip - WORKING_DIRECTORY ${LIB_DIR} + COMMAND unzip cast.zip + WORKING_DIRECTORY ${LIB_DIR}/clarius_cast ) # Header files are included in the zip file - file(MAKE_DIRECTORY ${INCLUDE_DIR}) - execute_process( - COMMAND mv "${LIB_DIR}/cast" "${CMAKE_CURRENT_SOURCE_DIR}/include" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - ) + execute_process(COMMAND rm -rf "${INCLUDE_DIR}/cast") + message("mv ${LIB_DIR}/clarius_cast/cast ${INCLUDE_DIR}/") + execute_process(COMMAND mv "${LIB_DIR}/clarius_cast/cast" "${INCLUDE_DIR}/") # Use python3.10 library execute_process( - COMMAND mv "${LIB_DIR}/python310/pyclariuscast.so" "${LIB_DIR}/" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND mv "${LIB_DIR}/clarius_cast/python310/pyclariuscast.so" "${LIB_DIR}/clarius_cast" ) # Cleanup execute_process( - COMMAND bash -c "rm -rf ${LIB_DIR}/python* ${LIB_DIR}/cast.zip" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND bash -c "rm -rf ${LIB_DIR}/clarius_cast/python* ${LIB_DIR}/clarius_cast/cast.zip" ) endif() diff --git a/holoscan_i4h/operators/clarius_cast/__init__.py b/holoscan_i4h/operators/clarius_cast/__init__.py new file mode 100644 index 00000000..32c11276 --- /dev/null +++ b/holoscan_i4h/operators/clarius_cast/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/holoscan_i4h/operators/clarius_cast/clarius_cast.py b/holoscan_i4h/operators/clarius_cast/clarius_cast.py new file mode 100644 index 00000000..f4bdd266 --- /dev/null +++ b/holoscan_i4h/operators/clarius_cast/clarius_cast.py @@ -0,0 +1,228 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes +import os +import sys +from io import BytesIO + +import holoscan +import numpy as np +import rti.connextdds as dds +from dds.schemas.usp_data import UltraSoundProbeData +from holoscan.core import Operator, OperatorSpec +from PIL import Image + +script_dir = os.path.dirname(os.path.abspath(__file__)) +holoscan_i4h_dir = f"{script_dir}/../../" + +# load the libcast.so shared library +libcast_handle = ctypes.CDLL(f"{holoscan_i4h_dir}/install/lib/clarius_cast/libcast.so", ctypes.RTLD_GLOBAL)._handle +# load the pyclariuscast.so shared library +ctypes.cdll.LoadLibrary(f"{holoscan_i4h_dir}/install/lib/clarius_cast/pyclariuscast.so") + +sys.path.append(f"{holoscan_i4h_dir}/install/lib/clarius_cast") +import pyclariuscast + +# The current image +img = None + + +def processed_image_cb(image, width, height, sz, microns_per_pixel, timestamp, angle, imu): + """ + Callback function that processes a scan-converted image. + + Parameters: + image: The processed image data. + width: The width of the image in pixels. + height: The height of the image in pixels. + sz: Full size of the image in bytes. + microns_per_pixel: Microns per pixel. + timestamp: The timestamp of the image in nanoseconds. + angle: Acquisition angle for volumetric data. + imu: IMU data tagged with the frame. + """ + bpp = sz / (width * height) + + global img + + if bpp == 4: + # Handle RGBA + img = Image.frombytes("RGBA", (width, height), image) + else: + # Handle JPEG + img = Image.open(BytesIO(image)) + + +def raw_image_cb(image, lines, samples, bps, axial, lateral, timestamp, jpg, rf, angle): + """ + Callback function for raw image data. + + Parameters: + image: The raw pre scan-converted image data, uncompressed 8-bit or jpeg compressed. + lines: Number of lines in the data. + samples: Number of samples in the data. + bps: Bits per sample. + axial: Microns per sample. + lateral: Microns per line. + timestamp: The timestamp of the image in nanoseconds. + jpg: JPEG compression size if the data is in JPEG format. + rf: Flag indicating if the image is radiofrequency data. + angle: Acquisition angle for volumetric data. + """ + # Not used in sample app + return + + +def spectrum_image_cb(image, lines, samples, bps, period, micronsPerSample, velocityPerSample, pw): + """ + Callback function for spectrum image data. + + Parameters: + image: The spectral image data. + lines: The number of lines in the spectrum. + samples: The number of samples per line. + bps: Bits per sample. + period: Line repetition period of the spectrum. + micronsPerSample: Microns per sample for an M spectrum. + velocityPerSample: Velocity per sample for a PW spectrum. + pw: Flag that is True for a PW spectrum, False for an M spectrum + """ + # Not used in sample app + return + + +def imu_data_cb(imu): + """ + Callback function for IMU data. + + Parameters: + imu: Inertial data tagged with the frame. + """ + # Not used in sample app + return + + +def freeze_cb(frozen): + """ + Callback function for freeze state changes. + + Parameters: + frozen: The freeze state of the imaging system. + """ + if frozen: + print("\nClarius: Run imaging") + else: + print("\nClarius: Stop imaging") + return + + +def buttons_cb(button, clicks): + """ + Callback function for button presses. + + Parameters: + button: The button that was pressed. + clicks: The number of clicks performed. + """ + print(f"button pressed: {button}, clicks: {clicks}") + return + + +class ClariusCastOp(Operator): + """ + Operator to interface with a Clarius UltraSound Probe using Clarius Cast APIs. + Captures processed image data from a Clarius Probe and publishes it via DDS. + """ + + def __init__(self, fragment, *args, ip, port, domain_id, width, height, topic_out, **kwargs): + """ + Initializes the ClariusCastOp operator. + + Parameters: + fragment: The fragment this operator belongs to. + ip: IP address of the Clarius probe. + port: Port number for Clarius probe. + domain_id: Domain ID for DDS communication. + width: Width of the image in pixels. + height: Height of the image in pixels. + topic_out: The DDS topic to publish ultrasound data. + """ + self.ip = ip + self.port = port + self.domain_id = domain_id + self.width = width + self.height = height + self.topic_out = topic_out + super().__init__(fragment, *args, **kwargs) + + def setup(self, spec: OperatorSpec): + """Set up the output for this operator.""" + spec.output("output") + + def start(self): + """Initialize and start the Clarius Cast connection and DDS publisher.""" + # initialize + path = os.path.expanduser("~/") + cast = pyclariuscast.Caster( + processed_image_cb, raw_image_cb, imu_data_cb, spectrum_image_cb, freeze_cb, buttons_cb + ) + self.cast = cast + ret = cast.init(path, self.width, self.height) + + if ret: + print("Initialization succeeded") + # Use JPEG format + JPEG = 2 + ret = cast.setFormat(JPEG) + if ret: + print("Setting format to JPEG") + else: + print("Failed setting format to JPEG") + # unload the shared library before destroying the cast object + ctypes.CDLL("libc.so.6").dlclose(libcast_handle) + cast.destroy() + exit() + + ret = cast.connect(self.ip, self.port, "research") + if ret: + print(f"Connected to {self.ip} on port {self.port}") + else: + print("Connection failed") + # unload the shared library before destroying the cast object + ctypes.CDLL("libc.so.6").dlclose(libcast_handle) + cast.destroy() + exit() + else: + print("Initialization failed") + return + + dp = dds.DomainParticipant(domain_id=self.domain_id) + topic = dds.Topic(dp, self.topic_out, UltraSoundProbeData) + self.writer = dds.DataWriter(dp.implicit_publisher, topic) + + def compute(self, op_input, op_output, context): + """Process the current image and publish it to DDS.""" + global img + + if img is None: + return + + image = np.array(img) + d = UltraSoundProbeData() + d.data = image.tobytes() + self.writer.write(d) + out_message = {"image": holoscan.as_tensor(image)} + op_output.emit(out_message, "output") diff --git a/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_solum/CMakeLists.txt b/holoscan_i4h/operators/clarius_solum/CMakeLists.txt similarity index 61% rename from workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_solum/CMakeLists.txt rename to holoscan_i4h/operators/clarius_solum/CMakeLists.txt index 91f4f215..c7ebc9b3 100644 --- a/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_solum/CMakeLists.txt +++ b/holoscan_i4h/operators/clarius_solum/CMakeLists.txt @@ -21,8 +21,8 @@ set(CMAKE_CXX_STANDARD 17) find_package(pybind11 REQUIRED) pybind11_add_module(pysolum pysolum.cpp) -set(LIB_DIR "${CMAKE_CURRENT_SOURCE_DIR}/lib") -set(INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include/solum") +set(LIB_DIR "${CMAKE_BINARY_DIR}/lib") +set(INCLUDE_DIR "${CMAKE_BINARY_DIR}/include") execute_process( COMMAND lsb_release -sr @@ -31,37 +31,31 @@ execute_process( ) # Download Clarius library and header files -if(NOT EXISTS "${LIB_DIR}/libsolum.so") - file(MAKE_DIRECTORY ${LIB_DIR}) +if(NOT EXISTS "${LIB_DIR}/clarius_solum/libsolum.so") file(DOWNLOAD "https://github.com/clariusdev/solum/releases/download/v12.0.2/solum-12.0.2-linux.x86_64-gcc_ubuntu_${UBUNTU_VERSION}.zip" - "${LIB_DIR}/solum.zip" + "${LIB_DIR}/clarius_solum/solum.zip" ) # Unzip the file execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${LIB_DIR}/solum.zip - WORKING_DIRECTORY ${LIB_DIR} - ) - - # Cleanup - execute_process( - COMMAND rm solum.zip - WORKING_DIRECTORY ${LIB_DIR} + COMMAND unzip solum.zip + WORKING_DIRECTORY ${LIB_DIR}/clarius_solum ) + execute_process(COMMAND rm ${LIB_DIR}/clarius_solum/solum.zip) endif() -if(NOT EXISTS "${INCLUDE_DIR}/solum.h") - file(MAKE_DIRECTORY ${INCLUDE_DIR}) - execute_process( - COMMAND mv "${LIB_DIR}/solum" "${CMAKE_CURRENT_SOURCE_DIR}/include" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - ) +if(NOT EXISTS "${INCLUDE_DIR}/solum/solum.h") + execute_process(COMMAND rm -rf "${INCLUDE_DIR}/solum") + message("mv ${LIB_DIR}/clarius_solum/solum ${INCLUDE_DIR}/") + file(MAKE_DIRECTORY ${INCLUDE_DIR}/solum) + execute_process(COMMAND mv "${LIB_DIR}/clarius_solum/solum" "${INCLUDE_DIR}/") endif() + # Header files -target_include_directories(pysolum PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include/") +target_include_directories(pysolum PRIVATE "${INCLUDE_DIR}/") # Place lib in lib/ folder set_target_properties(pysolum PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/lib" + LIBRARY_OUTPUT_DIRECTORY "${LIB_DIR}/clarius_solum" ) diff --git a/holoscan_i4h/operators/clarius_solum/__init__.py b/holoscan_i4h/operators/clarius_solum/__init__.py new file mode 100644 index 00000000..32c11276 --- /dev/null +++ b/holoscan_i4h/operators/clarius_solum/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/holoscan_i4h/operators/clarius_solum/clarius_solum.py b/holoscan_i4h/operators/clarius_solum/clarius_solum.py new file mode 100644 index 00000000..9bf62a91 --- /dev/null +++ b/holoscan_i4h/operators/clarius_solum/clarius_solum.py @@ -0,0 +1,278 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes +import os +import sys +from io import BytesIO +from time import sleep + +import holoscan +import numpy as np +import rti.connextdds as dds +from dds.schemas.usp_data import UltraSoundProbeData +from holoscan.core import Operator, OperatorSpec +from PIL import Image + +script_dir = os.path.dirname(os.path.abspath(__file__)) +holoscan_i4h_dir = f"{script_dir}/../../" + +# load the libsolum.so shared library +libsolum_handle = ctypes.CDLL(f"{holoscan_i4h_dir}/install/lib/clarius_solum/libsolum.so", ctypes.RTLD_GLOBAL)._handle + +sys.path.append(f"{holoscan_i4h_dir}/install/lib/clarius_solum") +import pysolum + +# Is probe connected +connected = False +# Probe is running +running = False +# The current image +img = None + + +def connect_cb(res, port, status): + """ + Callback function for connection events. + + Parameters: + res: The connection result. + port: UDP port used for streaming. + status: The status message. + """ + print(f"Connection: {res} {status}") + if res == pysolum.CusConnection.ProbeConnected: + global connected + connected = True + print(f"Streaming on port: {port}") + + +def cert_cb(days_valid): + """ + Callback function when the certificate is set. + + Parameters: + days_valid: Number of days the certificate is valid. + """ + print(f"Certificate valid for {days_valid} days.") + + +def power_down_cb(res, tm): + """ + Callback function when the device is powered down. + + Parameters: + res: The power down reason. + tm: Time in seconds until probe powers down, 0 for immediate shutdown. + """ + print(f"Powering down: {res} in {tm} seconds") + + +def processed_image_cb(image, width, height, size, micros_per_pixel, origin_x, origin_y, fps): + """ + Callback function when a new processed image is streamed. + + Parameters: + image: The scan-converted image data. + width: Image width in pixels. + height: Image height in pixels. + size: Full size of the image. + microns_per_pixel: Microns per pixel. + origin_x: Image origin in microns (horizontal axis). + origin_y: Image origin in microns (vertical axis). + fps: Frames per second. + """ + bpp = size / (width * height) + + global img + + if bpp == 4: + img = Image.frombytes("RGBA", (width, height), image).convert("L") + else: + img = Image.open(BytesIO(image)).convert("L") + + +def imu_port_cb(port): + """ + Callback function for new IMU data streaming port. + + Parameters: + port: The new IMU data UDP streaming port. + """ + # Not used in sample code + return + + +def imu_data_cb(pos): + """ + Callback function for new IMU data. + + Parameters: + pos: Positional information data tagged with the image. + """ + # Not used in sample code + return + + +def imaging_cb(state, imaging): + """ + Imaging callback function. + + Parameters: + state: The imaging state. + imaging: 1 if running, 0 if stopped. + """ + if imaging == 0: + print(f"State: {state} imaging: Stopped") + else: + global running + running = True + print(f"State: {state} imaging: Running") + + +def error_cb(code, msg): + """ + Callback function for error events. + + Parameters: + code: Error code associated with the error. + msg: The error message. + """ + print(f"Error: {code}: {msg}") + + +def buttons_cb(button, clicks): + """ + Callback function when a button is pressed. + + Parameters: + button: The button that was pressed. + clicks: Number of clicks performed. + """ + print(f"button pressed: {button}, clicks: {clicks}") + + +class ClariusSolumOp(Operator): + """ + Operator to interface with a Clarius UltraSound Probe using Clarius Solum APIs. + Captures processed image data from a Clarius Probe and publishes it via DDS. + """ + + def __init__(self, fragment, *args, ip, port, cert, model, app, domain_id, width, height, topic_out, **kwargs): + """ + Initializes the ClariusSolumOp operator. + + Parameters: + fragment: The fragment this operator belongs to. + ip: IP address of the Clarius probe. + port: Port number for Clarius probe. + cert: Path to the probe certificate. + model: The Clarius probe model name. + app: The ultrasound application to perform. + domain_id: Domain ID for DDS communication. + width: Width of the image in pixels. + height: Height of the image in pixels. + topic_out: The DDS topic to publish ultrasound data. + """ + self.ip = ip + self.port = port + self.cert = cert + self.model = model + self.app = app + self.domain_id = domain_id + self.width = width + self.height = height + self.topic_out = topic_out + super().__init__(fragment, *args, **kwargs) + + def setup(self, spec: OperatorSpec): + """Set up the output for this operator.""" + spec.output("output") + + def stop(self): + """Stops imaging on the Clarius probe.""" + print("Clarius: Stop imaging") + self.solum.stop_imaging() + + def start(self): + """ + Initializes and starts the Clarius Solum API connection. + Establishes a connection, loads the application, and starts imaging. + """ + # initialize + path = os.path.expanduser("~/") + + solum = pysolum.Solum( + connect_cb, + cert_cb, + power_down_cb, + processed_image_cb, + imu_port_cb, + imu_data_cb, + imaging_cb, + buttons_cb, + error_cb, + ) + + self.solum = solum + ret = solum.init(path, self.width, self.height) + + if ret: + global connected + + solum.set_certificate(self.cert) + ret = solum.connect(self.ip, self.port, "research") + + while not connected: + sleep(1) + + if ret: + print(f"Connected to {self.ip} on port {self.port}") + else: + print("Connection failed") + # unload the shared library before destroying the solum object + ctypes.CDLL("libc.so.6").dlclose(libsolum_handle) + solum.destroy() + exit() + + solum.load_application(self.model, self.app) + sleep(5) + solum.run_imaging() + + while not running: + sleep(1) + + else: + print("Initialization failed") + return + + dp = dds.DomainParticipant(domain_id=self.domain_id) + topic = dds.Topic(dp, self.topic_out, UltraSoundProbeData) + self.writer = dds.DataWriter(dp.implicit_publisher, topic) + print(f"Creating writer for topic: {self.domain_id}:{self.topic_out}") + + def compute(self, op_input, op_output, context): + """Processes the current ultrasound image and publishes it via DDS and to the downstream operator.""" + global img + + if img is None: + return + + image = np.array(img) + d = UltraSoundProbeData() + d.data = image.tobytes() + self.writer.write(d) + out_message = {"image": holoscan.as_tensor(image)} + op_output.emit(out_message, "output") diff --git a/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_solum/pysolum.cpp b/holoscan_i4h/operators/clarius_solum/pysolum.cpp similarity index 100% rename from workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_solum/pysolum.cpp rename to holoscan_i4h/operators/clarius_solum/pysolum.cpp diff --git a/holoscan_i4h/operators/no_op/__init__.py b/holoscan_i4h/operators/no_op/__init__.py new file mode 100644 index 00000000..32c11276 --- /dev/null +++ b/holoscan_i4h/operators/no_op/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/holoscan_i4h/operators/no_op/no_op.py b/holoscan_i4h/operators/no_op/no_op.py new file mode 100644 index 00000000..1f148abb --- /dev/null +++ b/holoscan_i4h/operators/no_op/no_op.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from holoscan.core import Operator +from holoscan.core._core import OperatorSpec + + +class NoOp(Operator): + """A sink operator that takes input and discards them.""" + + def __init__(self, fragment, input_ports=None, *args, **kwargs): + self.input_ports = input_ports or ["input"] + super().__init__(fragment, *args, **kwargs) + + def setup(self, spec: OperatorSpec): + for port in self.input_ports: + spec.input(port) + + def compute(self, op_input, op_output, context): + for port in self.input_ports: + op_input.receive(port) diff --git a/holoscan_i4h/operators/realsense/__init__.py b/holoscan_i4h/operators/realsense/__init__.py new file mode 100644 index 00000000..32c11276 --- /dev/null +++ b/holoscan_i4h/operators/realsense/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/holoscan_i4h/operators/realsense/realsense.py b/holoscan_i4h/operators/realsense/realsense.py new file mode 100644 index 00000000..766a4747 --- /dev/null +++ b/holoscan_i4h/operators/realsense/realsense.py @@ -0,0 +1,132 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import holoscan +import numpy as np +import pyrealsense2 as rs +import rti.connextdds as dds +from dds.schemas.camera_info import CameraInfo +from holoscan.core import Operator +from holoscan.core._core import OperatorSpec + + +class RealsenseOp(Operator): + """ + Operator to interface with an Intel RealSense camera. + Captures RGB and depth frames, optionally publishing them via DDS. + """ + + def __init__( + self, + fragment, + *args, + domain_id, + width, + height, + topic_rgb, + topic_depth, + device_idx, + framerate, + show_holoviz, + **kwargs, + ): + """ + Initialize the RealSense operator. + + Parameters: + - domain_id (int): DDS domain ID. + - width (int): Width of the camera stream. + - height (int): Height of the camera stream. + - topic_rgb (str): DDS topic for RGB frames. + - topic_depth (str): DDS topic for depth frames. + - device_idx (int): Camera device index. + - framerate (int): Frame rate for the camera stream. + - show_holoviz (bool): Whether to display frames using Holoviz. + """ + self.domain_id = domain_id + self.width = width + self.height = height + self.topic_rgb = topic_rgb + self.topic_depth = topic_depth + self.device_idx = device_idx + self.framerate = framerate + self.show_holoviz = show_holoviz + super().__init__(fragment, *args, **kwargs) + + self.pipeline = rs.pipeline() + self.rgb_writer = None + self.depth_writer = None + + def setup(self, spec: OperatorSpec): + """Define the output ports for the operator.""" + if self.topic_rgb: + spec.output("color") + if self.topic_depth and not self.show_holoviz: + spec.output("depth") + + def start(self): + """ + Configure and start the RealSense camera pipeline. + Sets up DDS writers if topics are provided. + """ + config = rs.config() + context = rs.context() + for device in context.query_devices(): + print(f"+++ Available device: {device}") + + if self.device_idx is not None: + if self.device_idx < len(context.query_devices()): + config.enable_device(context.query_devices()[self.device_idx].get_info(rs.camera_info.serial_number)) + else: + print(f"Ignoring input device_idx: {self.device_idx}") + + dp = dds.DomainParticipant(domain_id=self.domain_id) + if self.topic_rgb: + print("Enabling color...") + self.rgb_writer = dds.DataWriter(dp.implicit_publisher, dds.Topic(dp, self.topic_rgb, CameraInfo)) + config.enable_stream( + rs.stream.color, + width=self.width, + height=self.height, + format=rs.format.rgba8, + framerate=self.framerate, + ) + if self.topic_depth: + print("Enabling depth...") + config.enable_stream( + rs.stream.depth, + width=self.width, + height=self.height, + format=rs.format.z16, + framerate=self.framerate, + ) + self.depth_writer = dds.DataWriter(dp.implicit_publisher, dds.Topic(dp, self.topic_depth, CameraInfo)) + self.pipeline.start(config) + + def compute(self, op_input, op_output, context): + """ + Capture frames from the RealSense camera and publish them via DDS. + """ + frames = self.pipeline.wait_for_frames() + color = None + if self.rgb_writer: + color = np.asanyarray(frames.get_color_frame().get_data()) + self.rgb_writer.write(CameraInfo(data=color.tobytes(), width=self.width, height=self.height)) + op_output.emit({"color": holoscan.as_tensor(color)} if self.show_holoviz else True, "color") + if self.depth_writer: + depth = np.asanyarray(frames.get_depth_frame().get_data()).astype(np.float32) / 1000.0 + self.depth_writer.write(CameraInfo(data=depth.tobytes(), width=self.width, height=self.height)) + if not self.show_holoviz: + op_output.emit({"depth": holoscan.as_tensor(depth)}, "depth") diff --git a/tools/env_setup/bash_utils.sh b/tools/env_setup/bash_utils.sh index 9b944c56..05a2da49 100644 --- a/tools/env_setup/bash_utils.sh +++ b/tools/env_setup/bash_utils.sh @@ -27,6 +27,11 @@ check_conda_env() { } check_nvidia_gpu() { + # skip check if building docker image + if [ "$BUILD_DOCKER_IMAGE" = "true" ]; then + return + fi + if ! nvidia-smi &> /dev/null; then echo "Error: NVIDIA GPU not found or driver not installed" exit 1 diff --git a/tools/env_setup/install_clarius.sh b/tools/env_setup/install_clarius.sh new file mode 100755 index 00000000..0145ec50 --- /dev/null +++ b/tools/env_setup/install_clarius.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +HOLOSCAN_I4H_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../../holoscan_i4h && pwd)" + +( + cd $HOLOSCAN_I4H_ROOT + cmake . -B build && cmake --build build && cmake --install build + echo "Successfully built Clarius libs" +) diff --git a/tools/env_setup/install_gr00tn1.sh b/tools/env_setup/install_gr00tn1.sh index 323b5aa4..1c51e3c7 100644 --- a/tools/env_setup/install_gr00tn1.sh +++ b/tools/env_setup/install_gr00tn1.sh @@ -37,6 +37,9 @@ else # Ensure parent third_party dir exists mkdir -p "$PROJECT_ROOT/third_party" git clone https://github.com/NVIDIA/Isaac-GR00T "$GR00T_DIR" + pushd "$GR00T_DIR" + git checkout n1-release + popd fi pushd "$GR00T_DIR" @@ -49,7 +52,8 @@ else echo "pyav already updated to av or not found in Isaac-GR00T's pyproject.toml." fi -pip install -e . +pip install -e .[base] +pip install transformers==4.45.2 popd pip install --no-build-isolation flash-attn==2.7.1.post4 diff --git a/tools/env_setup/install_isaac.sh b/tools/env_setup/install_isaac.sh index 00e7783d..50359dac 100755 --- a/tools/env_setup/install_isaac.sh +++ b/tools/env_setup/install_isaac.sh @@ -35,7 +35,7 @@ mkdir -p "$PROJECT_ROOT/third_party" # ---- Install IsaacSim ---- echo "Installing IsaacSim..." pip install 'isaacsim[all,extscache]==4.5.0' \ - git+ssh://git@github.com/isaac-for-healthcare/i4h-asset-catalog.git@v0.2.0rc1 \ + git+ssh://git@github.com/isaac-for-healthcare/i4h-asset-catalog.git@v0.2.0rc2 \ --extra-index-url https://pypi.nvidia.com ISAACLAB_DIR="$PROJECT_ROOT/third_party/IsaacLab" @@ -85,4 +85,7 @@ echo "Installing IsaacLab ..." yes Yes | ./isaaclab.sh --install popd +# ---- Apply dependency package version patch ---- +pip install 'warp-lang==1.7.2' + echo "IsaacSim and dependencies installed successfully!" diff --git a/tools/env_setup_robot_us.sh b/tools/env_setup_robot_us.sh index 72bbe403..d15cfcd3 100755 --- a/tools/env_setup_robot_us.sh +++ b/tools/env_setup_robot_us.sh @@ -71,9 +71,13 @@ ensure_fresh_third_party_dir # ---- Install build tools (Common) ---- echo "Installing build tools..." if [ "$EUID" -ne 0 ]; then - sudo apt-get install -y git cmake build-essential pybind11-dev libxcb-cursor0 + sudo apt-get install -y cmake + sudo apt-get update + sudo apt-get install -y git build-essential pybind11-dev libxcb-cursor0 else - apt-get install -y git cmake build-essential pybind11-dev libxcb-cursor0 + apt-get install -y cmake + apt-get update + apt-get install -y git build-essential pybind11-dev libxcb-cursor0 fi @@ -114,6 +118,11 @@ if [[ "$INSTALL_WITH_POLICY" == "gr00tn1" ]]; then fi +# ---- Installing Clarius libs ---- +echo "Installing Clarius libs..." +bash $PROJECT_ROOT/tools/env_setup/install_clarius.sh + + # for holoscan and cosmos transfer1, we need to install the following conda packages: conda install -c conda-forge ninja libgl ffmpeg pybind11 gcc=12.4.0 gxx=12.4.0 libstdcxx-ng=12.4.0 -y diff --git a/tools/install_deps.py b/tools/install_deps.py index 7d5a7432..2e566eeb 100644 --- a/tools/install_deps.py +++ b/tools/install_deps.py @@ -21,7 +21,7 @@ def install_dependencies(workflow_name: str = "robotic_ultrasound"): """Install project dependencies from requirements.txt""" - if workflow_name not in ["robotic_ultrasound", "robotic_surgery"]: + if workflow_name not in ["robotic_ultrasound", "robotic_surgery", "none"]: raise ValueError(f"Invalid workflow name: {workflow_name}") try: diff --git a/tools/run_all_tests.py b/tools/run_all_tests.py index 3f8bdea8..cc332469 100644 --- a/tools/run_all_tests.py +++ b/tools/run_all_tests.py @@ -146,6 +146,9 @@ def _run_test_process(cmd, env, test_path, timeout=1200): if return_code == 0: print(f"\nTEST PASSED in {elapsed_time} seconds") return True + elif return_code == -6: + print("\n The process crashes at shutdown because of native async code that does not finalize safely.") + return True else: print(f"\nTEST FAILED with return code {return_code} after {elapsed_time} seconds") @@ -280,26 +283,31 @@ def run_integration_tests(workflow_name, timeout=1200): print(f"Test timeout: {timeout} seconds ({timeout//60} minutes)") project_root = f"workflows/{workflow_name}" - default_license_file = os.path.join(os.getcwd(), project_root, "scripts", "dds", "rti_license.dat") - os.environ["RTI_LICENSE_FILE"] = os.environ.get("RTI_LICENSE_FILE", default_license_file) - all_tests_passed = True - tests_dir = os.path.join(project_root, "tests") - print(f"Looking for tests in {tests_dir}") - tests = get_tests(tests_dir, pattern="test_integration_*.py") - env = _setup_test_env(project_root, tests_dir) - - for test_path in tests: - cmd = [ - sys.executable, - "-m", - "unittest", - test_path, - ] - if "cosmos_transfer1" in test_path: - env = _setup_test_cosmos_transfer1_env(os.getcwd(), project_root, tests_dir) - - if not _run_test_process(cmd, env, test_path): - all_tests_passed = False + try: + default_license_file = os.path.join(os.getcwd(), project_root, "scripts", "dds", "rti_license.dat") + os.environ["RTI_LICENSE_FILE"] = os.environ.get("RTI_LICENSE_FILE", default_license_file) + all_tests_passed = True + tests_dir = os.path.join(project_root, "tests") + print(f"Looking for tests in {tests_dir}") + tests = get_tests(tests_dir, pattern="test_integration_*.py") + env = _setup_test_env(project_root, tests_dir) + + for test_path in tests: + cmd = [ + sys.executable, + "-m", + "unittest", + test_path, + ] + if "cosmos_transfer1" in test_path: + env = _setup_test_cosmos_transfer1_env(os.getcwd(), project_root, tests_dir) + + if not _run_test_process(cmd, env, test_path): + all_tests_passed = False + except Exception as e: + print(f"Error running integration tests: {e}") + print(traceback.format_exc()) + return 1 return 0 if all_tests_passed else 1 diff --git a/tutorials/assets/README.md b/tutorials/assets/README.md index beb58ed8..1b2ba1fe 100644 --- a/tutorials/assets/README.md +++ b/tutorials/assets/README.md @@ -13,3 +13,6 @@ This directory contains tutorials for integrating and teleoperating your own rob - [Replace Franka Hand with Ultrasound Probe](./bring_your_own_robot/replace_franka_hand_with_ultrasound_probe.md) Step-by-step guide to replacing the Franka robot’s hand with an ultrasound probe in Isaac Sim, including CAD/URDF conversion, asset import, and joint setup for custom robotic ultrasound simulation. + +- [Bring Your Own Head-Mounted Display with OpenXR](./bring_your_own_xr/) + Comprehensive guide for using OpenXR-enabled mixed reality devices (like Apple Vision Pro) for immersive robotic teleoperation in Isaac Lab. Learn to set up NVIDIA CloudXR runtime, configure hand tracking controls, and enable simulated robotic teleoperation in Isaac Lab diff --git a/tutorials/assets/bring_your_own_patient/README.md b/tutorials/assets/bring_your_own_patient/README.md index be0891d8..8994760f 100644 --- a/tutorials/assets/bring_your_own_patient/README.md +++ b/tutorials/assets/bring_your_own_patient/README.md @@ -31,7 +31,7 @@ Currently, the robotic ultrasound workflow uses the assets in [I4H Asset Catalog If you generate the mesh and USD files from the different source (e.g. mesh from CT scan but USD from public human body 3D model), you need to align the meshes to achieve realistic simulation results. -- USD model follows the [USD convention](https://docs.omniverse.nvidia.com/isaacsim/latest/reference_conventions.html#usd-axes). +- USD model follows the [USD convention](ttps://docs.isaacsim.omniverse.nvidia.com/4.5.0/reference_material/reference_conventions.html#usd-axes). - It is the `organ` frame in the [environment configuration file](../../../workflows/robotic_ultrasound/scripts/simulation/exts/robotic_us_ext/robotic_us_ext/tasks/ultrasound/approach/config/franka/franka_manager_rl_env_cfg.py) - Mesh object for the organs diff --git a/tutorials/assets/bring_your_own_robot/Virtual_Incision_MIRA/README.md b/tutorials/assets/bring_your_own_robot/Virtual_Incision_MIRA/README.md index 2574b546..0a2e871b 100644 --- a/tutorials/assets/bring_your_own_robot/Virtual_Incision_MIRA/README.md +++ b/tutorials/assets/bring_your_own_robot/Virtual_Incision_MIRA/README.md @@ -8,6 +8,8 @@ This tutorial shows how to teleoperate the [Virtual Incision MIRA](https://virtu ## Environment Setup +**Note**: The setup process takes approximately 30-40 minutes to complete, depending on your system and network connection. + This tutorial requires the following dependencies: - [IsaacSim 4.5.0](https://docs.isaacsim.omniverse.nvidia.com/4.5.0/index.html) - [IsaacLab 2.1.0](https://isaac-sim.github.io/IsaacLab/v2.1.0/index.html) diff --git a/tutorials/assets/bring_your_own_robot/replace_franka_hand_with_ultrasound_probe.md b/tutorials/assets/bring_your_own_robot/replace_franka_hand_with_ultrasound_probe.md index 47621f8c..f5c5a316 100644 --- a/tutorials/assets/bring_your_own_robot/replace_franka_hand_with_ultrasound_probe.md +++ b/tutorials/assets/bring_your_own_robot/replace_franka_hand_with_ultrasound_probe.md @@ -48,7 +48,7 @@ To convert URDF to USD for use in Isaac Sim: - Select your URDF file and configure import settings - Click "Import" to generate the USD representation -For detailed steps, refer to the [official documentation on importing URDF](https://docs.isaacsim.omniverse.nvidia.com/latest/robot_setup/import_urdf.html). +For detailed steps, refer to the [official documentation on importing URDF](https://docs.isaacsim.omniverse.nvidia.com/4.5.0/robot_setup/import_urdf.html). ## Replacing Franka Hand with Ultrasound Probe @@ -106,9 +106,9 @@ For detailed steps, refer to the [official documentation on importing URDF](http ## Additional Resources -- [URDF to USD Conversion Documentation](https://docs.isaacsim.omniverse.nvidia.com/latest/robot_setup/import_urdf.html#tutorial-import-urdf) +- [URDF to USD Conversion Documentation](https://docs.isaacsim.omniverse.nvidia.com/4.5.0/robot_setup/import_urdf.html#tutorial-import-urdf) - [Omniverse CAD Extension Documentation](https://docs.omniverse.nvidia.com/extensions/latest/ext_cad-converter/manual.html) -- [Omniverse Importers and Exporters Documentation](https://docs.isaacsim.omniverse.nvidia.com/latest/robot_setup/importers_exporters.html#) +- [Omniverse Importers and Exporters Documentation](https://docs.isaacsim.omniverse.nvidia.com/4.5.0/robot_setup/importers_exporters.html#) - [Video Tutorial: Training a Robot from Scratch in Simulation, from URDF to OpenUSD](https://www.youtube.com/live/_HMk7I-vSBQ) ## Support diff --git a/tutorials/assets/bring_your_own_xr/README.md b/tutorials/assets/bring_your_own_xr/README.md index 9c4e086b..7486fca0 100644 --- a/tutorials/assets/bring_your_own_xr/README.md +++ b/tutorials/assets/bring_your_own_xr/README.md @@ -178,11 +178,28 @@ to any device if no device is connected. Click "Stop AR" to exit the OpenXR sess **Note**: XR teleoperation updates are currently applied relative to the probe head. The coordinate system from the user's tracked right hand is applied along the local probe axes as pictured below: - **Hand Tracking Left/Right** -> Green axis -- **Hand Tracking Up/Down** -> Red axis -- **Hand Tracking Towards/Away** -> Blue axis +- **Hand Tracking Up/Down** -> Blue axis +- **Hand Tracking Towards/Away** -> Red axis ![Coordinate frame as viewed along the robotic ultrasound probe](resources/probe_frame.png) +This mapping is achieved by a `transform_matrix` defined in the `teleop_se3_agent.py` script. The hand tracking system provides coordinates in its own frame, which needs to be converted to the robot's end-effector (probe) frame. The `transform_matrix`: +```python +torch.tensor( + [[-1, 0, 0, 0], [0, 1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], + dtype=torch.float32, + device=env.unwrapped.device, +) +``` +This matrix effectively flips the X and Z axes of the incoming hand tracking data. Specifically: +- The X-axis (Up/Down in hand tracking) is inverted. +- The Y-axis (Left/Right in hand tracking) remains unchanged. +- The Z-axis (Towards/Away in hand tracking) is inverted. + +This transformation ensures that the user's hand movements are intuitively mapped to the robot's actions in the simulation environment. +For a clearer understanding of how the `transform_matrix` maps hand tracking coordinates to the probe frame, please refer to the coordinate frame visualization below: +![Coordinate frame](resources/probe_frame_room_view.png) + ## Apple Vision Pro Run the following command on your local workstation to view your IP address: diff --git a/tutorials/assets/bring_your_own_xr/resources/probe_frame_room_view.png b/tutorials/assets/bring_your_own_xr/resources/probe_frame_room_view.png new file mode 100644 index 00000000..35c7d2f9 Binary files /dev/null and b/tutorials/assets/bring_your_own_xr/resources/probe_frame_room_view.png differ diff --git a/workflows/robotic_surgery/README.md b/workflows/robotic_surgery/README.md index f2d554e1..dcfe94d4 100644 --- a/workflows/robotic_surgery/README.md +++ b/workflows/robotic_surgery/README.md @@ -2,9 +2,11 @@ ![Robotic Surgery Workflow](../../docs/source/robotic_surgery_workflow.jpg) +The Robotic Surgery Workflow is a comprehensive solution designed for healthcare professionals and researchers working in the field of robotic-assisted surgery. This workflow provides a robust framework for simulating, training, and analyzing robotic surgical procedures in a virtual environment. It leverages NVIDIA's ray tracing capabilities to create highly realistic surgical simulations, enabling surgeons to practice complex procedures, researchers to develop new surgical techniques, and medical institutions to enhance their training programs. By offering a safe, controlled environment for surgical practice and research, this workflow helps improve surgical outcomes, reduce training costs, and advance the field of robotic surgery. + + ## Table of Contents - [System Requirements](#system-requirements) -- [Quick Start](#quick-start) - [Environment Setup](#environment-setup) - [Prerequisites](#prerequisites) - [Installation Steps](#installation-steps) @@ -23,33 +25,13 @@ - 16GB RAM minimum ### Software Requirements -- NVIDIA Driver Version >= 555 +- [NVIDIA Driver Version >= 555](https://www.nvidia.com/en-us/drivers/) - Python 3.10 -## Quick Start - -1. Install NVIDIA driver (>= 555) -2. Create and activate conda environment: - ```bash - conda create -n robotic_surgery python=3.10 -y - conda activate robotic_surgery - ``` -3. Run the setup script: - ```bash - cd - bash tools/env_setup_robot_surgery.sh - ``` -4. Download assets: - ```bash - i4h-asset-retrieve - ``` -5. Set environment variables: - ```bash - export PYTHONPATH=/workflows/robotic_surgery/scripts - ``` - ## Environment Setup +**Note**: The setup process takes approximately 30-40 minutes to complete, depending on your system and network connection. + ### Prerequisites The robotic surgery workflow is built on the following dependencies: @@ -65,7 +47,12 @@ Install or upgrade to the latest NVIDIA driver from [NVIDIA website](https://www #### 2. Install Dependencies +##### Install Conda + +[Miniconda](https://www.anaconda.com/docs/getting-started/miniconda/main) is recommended. + ##### Create Conda Environment + ```bash # Create a new conda environment conda create -n robotic_surgery python=3.10 -y @@ -73,6 +60,12 @@ conda create -n robotic_surgery python=3.10 -y conda activate robotic_surgery ``` +##### Clone repository + ```bash + git clone https://github.com/isaac-for-healthcare/i4h-workflows.git + cd i4h-workflows + ``` + ##### Install All Dependencies The main script `tools/env_setup_robot_surgery.sh` installs all necessary dependencies: @@ -89,12 +82,14 @@ bash tools/env_setup_robot_surgery.sh ### Asset Setup +**Note**: The assets can be automatically retrieved when running the workflows. Optionally, you can also download all the assets in advance. Please note that the assets are approximately 65 GB and may take some time to download depending on your internet connection. + Download the required assets using: ```bash i4h-asset-retrieve ``` -This will download assets to `~/.cache/i4h-assets/`. For more details, refer to the [Asset Container Helper](https://github.com/isaac-for-healthcare/i4h-asset-catalog/blob/v0.2.0rc1/docs/catalog_helper.md). +This will download assets to `~/.cache/i4h-assets/`. For more details, refer to the [Asset Container Helper](https://github.com/isaac-for-healthcare/i4h-asset-catalog/blob/v0.2.0rc2/docs/catalog_helper.md). ### Environment Variables diff --git a/workflows/robotic_surgery/docker/Dockerfile b/workflows/robotic_surgery/docker/Dockerfile index 7edd5b60..f6f29d57 100644 --- a/workflows/robotic_surgery/docker/Dockerfile +++ b/workflows/robotic_surgery/docker/Dockerfile @@ -15,42 +15,69 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM nvcr.io/nvidia/isaac-lab:2.1.0 - -# Apply patches to IsaacLab -COPY tools/env_setup/patches/events_random_texture.patch /tmp/ -COPY tools/env_setup/patches/from_files_semantic_tags.patch /tmp/ - -RUN cd /workspace/isaaclab && \ - patch -p1 < /tmp/events_random_texture.patch && \ - patch -p1 < /tmp/from_files_semantic_tags.patch && \ - rm /tmp/events_random_texture.patch /tmp/from_files_semantic_tags.patch - - -# This line enables SSH access for the following RUN command -RUN apt-get update && apt-get install -y git openssh-client - -# Add GitHub to known hosts to avoid "host key verification failed" -RUN mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts - -WORKDIR /workspace/robotic_surgery/scripts - -# Fix livestream public endpoint address issue in 2.0.2/2.1.0 -RUN sed -i '/--\/app\/livestream\/publicEndpointAddress=/d' /workspace/isaaclab/source/isaaclab/isaaclab/app/app_launcher.py - -COPY workflows/robotic_surgery/scripts . - -COPY tools/env_setup/install_robotic_surgery_extensions.sh /tmp/install_robotic_surgery_extensions.sh - -ENV PYTHONPATH=/workspace/robotic_surgery/scripts - -ENV PYTHON_EXECUTABLE=/workspace/isaaclab/_isaac_sim/python.sh - -RUN --mount=type=ssh $PYTHON_EXECUTABLE -m pip install --no-deps git+ssh://git@github.com/isaac-for-healthcare/i4h-asset-catalog.git@v0.2.0rc1 - -# Install the robotic surgery extensions -# Extension installation also needs to know which python to use via the PYTHON_EXECUTABLE environment variable -RUN /tmp/install_robotic_surgery_extensions.sh simulation/exts - -# Clean up the install script -RUN rm /tmp/install_robotic_surgery_extensions.sh +FROM nvidia/cuda:12.8.1-devel-ubuntu24.04 + +SHELL ["/bin/bash", "-c"] + +# Install all packages in a single layer to avoid caching issues +RUN apt-get update && \ + apt-get install -y \ + wget \ + curl \ + jq \ + vim \ + git \ + xvfb \ + build-essential \ + cmake \ + vulkan-tools \ + unzip \ + lsb-release \ + libglib2.0-0 \ + libdbus-1-3 \ + libopengl0 \ + libxcb-keysyms1 \ + libglu1-mesa && \ + rm -rf /var/lib/apt/lists/* + +# Add github.com to list of known hosts for git clone with ssh +# use COPY command instead +RUN mkdir -p ~/.ssh && \ + curl --silent https://api.github.com/meta \ + | jq --raw-output '"github.com "+.ssh_keys[]' >> ~/.ssh/known_hosts +#RUN mkdir -p ~/.ssh +#COPY ./github /root/.ssh/known_hosts + +# all devices should be visible +ENV NVIDIA_VISIBLE_DEVICES=all +# set 'compute' driver cap to use Cuda +# set 'video' driver cap to use the video encoder +# set 'graphics' driver cap to use OpenGL/EGL +# set 'display' to allow use of virtual display +ENV NVIDIA_DRIVER_CAPABILITIES=graphics,video,compute,utility,display + +# Install Everything in conda environment + +WORKDIR /workspace/i4h-workflows + +COPY tools /workspace/i4h-workflows/tools +COPY workflows /workspace/i4h-workflows/workflows + +# Install miniconda3 and create robotic_surgery +RUN --mount=type=ssh \ + mkdir -p ~/miniconda3 && \ + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh && \ + bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 && \ + rm ~/miniconda3/miniconda.sh && \ + source ~/miniconda3/bin/activate && \ + conda init --all && \ + conda create -n robotic_surgery python=3.10.14 -y + +RUN --mount=type=ssh \ + source ~/miniconda3/bin/activate && \ + conda activate robotic_surgery && \ + cd /workspace/i4h-workflows && \ + BUILD_DOCKER_IMAGE=true bash tools/env_setup_robot_surgery.sh + +ENV PYTHONPATH=/workspace/i4h-workflows/workflows/robotic_surgery/scripts +ENV RTI_LICENSE_FILE=/root/rti/rti_license.dat diff --git a/workflows/robotic_surgery/docker/README.md b/workflows/robotic_surgery/docker/README.md index d4439dde..e81e2c6a 100644 --- a/workflows/robotic_surgery/docker/README.md +++ b/workflows/robotic_surgery/docker/README.md @@ -1,27 +1,48 @@ -# Simulation LiveStream from Remote Docker Container +# Robotic Surgery Docker Container -This document describes how to run the simulation in a remote docker container and stream the simulation to a local machine. +This guide provides instructions for running robotic surgery simulations using Docker containers with Isaac Sim. ## Prerequisites -Please refer to [Livestream Clients Guide in Isaac Sim](https://docs.isaacsim.omniverse.nvidia.com/latest/installation/manual_livestream_clients.html#isaac-sim-short-webrtc-streaming-client) and download [Isaac Sim WebRTC Streaming Client](https://docs.isaacsim.omniverse.nvidia.com/latest/installation/download.html#isaac-sim-latest-release) +- **Docker Engine** +- **NVIDIA Docker Runtime** +- **Git** with SSH key access to private repositories +- **X11 forwarding** support (for GUI mode) -## Build Docker Image +## Build the Docker Image -To build the docker image, you will need to set up the SSH agent and add your SSH key to the agent, so that the docker build process can access the private repository. +```sh +# Clone the repository +git clone https://github.com/isaac-for-healthcare/i4h-workflows.git +cd i4h-workflows -```bash +# Enable BuildKit export DOCKER_BUILDKIT=1 + +# Set up SSH agent for private repository access eval "$(ssh-agent -s)" -ssh-add ~/.ssh/id_ed25519 # Replace with your SSH key +ssh-add ~/.ssh/id_ed25519 # Replace with your SSH key path + +# Build the Docker image with SSH forwarding docker build --ssh default -f workflows/robotic_surgery/docker/Dockerfile -t robotic_surgery:latest . ``` -## Run the Container +## Running the Container -```bash -docker run --name isaac-sim --entrypoint bash -it --runtime=nvidia --gpus all -e "ACCEPT_EULA=Y" --rm --network=host \ +```sh +# Allow Docker to access X11 display +xhost +local:docker + +# Run container with GUI support +docker run --name isaac-sim -it --gpus all --rm \ + --network=host \ + --runtime=nvidia \ + --entrypoint=bash \ + -e DISPLAY=$DISPLAY \ + -e "OMNI_KIT_ACCEPT_EULA=Y" \ + -e "ACCEPT_EULA=Y" \ -e "PRIVACY_CONSENT=Y" \ + -v /tmp/.X11-unix:/tmp/.X11-unix \ -v ~/docker/isaac-sim/cache/kit:/isaac-sim/kit/cache:rw \ -v ~/docker/isaac-sim/cache/ov:/root/.cache/ov:rw \ -v ~/docker/isaac-sim/cache/pip:/root/.cache/pip:rw \ @@ -34,19 +55,61 @@ docker run --name isaac-sim --entrypoint bash -it --runtime=nvidia --gpus all -e robotic_surgery:latest ``` -### Run the Simulation +## Running the Simulation + +### 1. Interactive GUI Mode with X11 Forwarding + +For interactive development and debugging: + +```bash +# Inside the container +conda activate robotic_surgery + +# Run simulation with GUI +python workflows/robotic_surgery/scripts/simulation/scripts/environments/state_machine/reach_psm_sm.py +``` + +Please refer to the [Simulation README](../scripts/simulation/README.md) for more details. + +### 2. Headless Streaming Mode -In the container, run the simulation with `--livestream 2` to stream the simulation to the local machine. For example, to run the `reach_psm_sm.py` script, run the following command: +For remote access: ```bash -docker exec -it isaac-sim bash -# Inside the container, run the simulation -python simulation/scripts/environments/state_machine/reach_psm_sm.py --livestream 2 +# Inside the container +conda activate robotic_surgery + +# Run simulation with WebRTC streaming +python workflows/robotic_surgery/scripts/simulation/scripts/environments/state_machine/reach_psm_sm.py --livestream 2 ``` -Please refer to the [simulation README](../scripts/simulation/README.md) for other examples. +#### WebRTC Client Setup + +1. **Download the Isaac Sim WebRTC Client**: + - Visit the [Isaac Sim Download Page](https://docs.isaacsim.omniverse.nvidia.com/4.5.0/installation/download.html) + - Download the **Isaac Sim WebRTC Streaming Client** +2. **Configure Connection**: + - Open the WebRTC client + - Enter the server IP address (container host IP) + - Wait for the simulation to initialize (look for "Resetting the state machine" message) + - Click "Connect" -### Open the WebRTC Client +3. **Network Requirements**: + - Ensure ports are accessible: `TCP/UDP 47995-48012`, `TCP/UDP 49000-49007`, `TCP 49100` + - For remote access, configure firewall rules accordingly -Wait for the simulation to start (you should see the message "Resetting the state machine" when the simulation is ready), and open the WebRTC client to view the simulation. +## Troubleshooting + +### Common Issues + +**Display Issues**: +```bash +# Reset X11 permissions +xhost -local:docker +xhost +local:docker + +# Check DISPLAY variable +echo $DISPLAY +``` +This value should be set for interactive mode. diff --git a/workflows/robotic_ultrasound/README.md b/workflows/robotic_ultrasound/README.md index 3777d61c..c0391e98 100644 --- a/workflows/robotic_ultrasound/README.md +++ b/workflows/robotic_ultrasound/README.md @@ -2,6 +2,18 @@ ![Robotic Ultrasound Workflow](../../docs/source/robotic_us_workflow.jpg) +The Robotic Ultrasound Workflow is a comprehensive solution designed for healthcare professionals, medical imaging researchers, and ultrasound device manufacturers working in the field of autonomous ultrasound imaging. This workflow provides a robust framework for simulating, training, and deploying robotic ultrasound systems using NVIDIA's advanced ray tracing technology. By offering a physics-accurate ultrasound simulation environment, it enables researchers to develop and validate autonomous scanning protocols, train AI models for image interpretation, and accelerate the development of next-generation ultrasound systems without requiring physical hardware. + +The workflow features a state-of-the-art ultrasound sensor simulation that leverages GPU-accelerated ray tracing to model the complex physics of ultrasound wave propagation. The simulator accurately represents: +- Acoustic wave propagation through different tissue types +- Tissue-specific acoustic properties (impedance, attenuation, scattering) +- Real-time B-mode image generation based on echo signals +- Dynamic tissue deformation and movement +- Multi-frequency transducer capabilities + +This physics-based approach enables the generation of highly realistic synthetic ultrasound images that closely match real-world data, making it ideal for training AI models and validating autonomous scanning algorithms. The workflow supports multiple AI policies (PI0, GR00T N1) and can be deployed using NVIDIA Holoscan for clinical applications, providing a complete pipeline from simulation to real-world deployment. + + ## Table of Contents - [System Requirements](#system-requirements) - [Quick Start](#quick-start) @@ -16,41 +28,54 @@ ### Hardware Requirements - Ubuntu 22.04 -- NVIDIA GPU with compute capability 8.6 and 32GB of memory +- NVIDIA GPU with compute capability 8.6 and 24GB of memory ([see NVIDIA's compute capability guide](https://developer.nvidia.com/cuda-gpus#compute)) - GPUs without RT Cores (A100, H100) are not supported - 50GB of disk space ### Software Requirements -- NVIDIA Driver Version >= 555 -- CUDA Version >= 12.6 +- [NVIDIA Driver Version >= 555](https://www.nvidia.com/en-us/drivers/) +- [CUDA Version >= 12.6]((https://developer.nvidia.com/cuda-downloads)) - Python 3.10 -- RTI DDS License +- [RTI DDS License](https://www.rti.com/free-trial) ## Quick Start +**Note**: The setup process takes approximately 30-40 minutes to complete, depending on your system and network connection. + 1. Install NVIDIA driver (>= 555) and CUDA (>= 12.6) -2. Create and activate conda environment: +2. Install conda: + + [Miniconda](https://www.anaconda.com/docs/getting-started/miniconda/main) is recommended. + +3. Create and activate conda environment: ```bash conda create -n robotic_ultrasound python=3.10 -y conda activate robotic_ultrasound ``` -3. Run the setup script: +4. Clone the repository: + ```bash + git clone https://github.com/isaac-for-healthcare/i4h-workflows.git + cd i4h-workflows + ``` +5. Run the setup script: ```bash cd bash tools/env_setup_robot_us.sh --policy pi0 ``` -4. Download assets: +6. Download assets: ```bash i4h-asset-retrieve ``` -5. Set environment variables: +7. Set environment variables: ```bash - export PYTHONPATH=`/workflows/robotic_ultrasound/scripts` + export PYTHONPATH=`/workflows/robotic_ultrasound/scripts:` export RTI_LICENSE_FILE= ``` ## Environment Setup +**Note**: The setup process takes approximately 30-40 minutes to complete, depending on your system and network connection. + ### Prerequisites The robotic ultrasound workflow is built on the following dependencies: @@ -86,8 +111,8 @@ conda activate robotic_ultrasound ##### Install Raytracing Ultrasound Simulator Choose one of the following options: -- **(Experimental)** Download the pre-release version from [here](https://github.com/isaac-for-healthcare/i4h-sensor-simulation/releases/tag/v0.2.0rc1) and extract to `workflows/robotic_ultrasound/scripts/raysim` -- **(Recommended)** Install and build following instructions in [Raytracing Ultrasound Simulator](https://github.com/isaac-for-healthcare/i4h-sensor-simulation/tree/main/ultrasound-raytracing#installation) +- **(Use pre-built binary)** Download the pre-release version from [here](https://github.com/isaac-for-healthcare/i4h-sensor-simulation/releases/tag/v0.2.0rc2) and extract to `workflows/robotic_ultrasound/scripts/raysim` +- **(Compiling from source)** Install and build following instructions in [Raytracing Ultrasound Simulator](https://github.com/isaac-for-healthcare/i4h-sensor-simulation/tree/main/ultrasound-raytracing#installation) ##### Install All Dependencies The main script `tools/env_setup_robot_us.sh` installs all necessary dependencies. It first installs common base components and then policy-specific packages based on an argument. @@ -129,7 +154,7 @@ Download the required assets using: i4h-asset-retrieve ``` -This will download assets to `~/.cache/i4h-assets/`. For more details, refer to the [Asset Container Helper](https://github.com/isaac-for-healthcare/i4h-asset-catalog/blob/v0.2.0rc1/docs/catalog_helper.md). +This will download assets to `~/.cache/i4h-assets/`. For more details, refer to the [Asset Container Helper](https://github.com/isaac-for-healthcare/i4h-asset-catalog/blob/v0.2.0rc2/docs/catalog_helper.md). **Note**: During asset download, you may see warnings about blocking functions. This is expected behavior and the download will complete successfully despite these warnings. diff --git a/workflows/robotic_ultrasound/docker/Dockerfile b/workflows/robotic_ultrasound/docker/Dockerfile index 4d7f8bb6..1fc50250 100644 --- a/workflows/robotic_ultrasound/docker/Dockerfile +++ b/workflows/robotic_ultrasound/docker/Dockerfile @@ -15,104 +15,71 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM nvcr.io/nvidia/isaac-lab:2.1.0 +FROM nvidia/cuda:12.8.1-devel-ubuntu24.04 -# Apply patches to IsaacLab -COPY tools/env_setup/patches/events_random_texture.patch /tmp/ -COPY tools/env_setup/patches/from_files_semantic_tags.patch /tmp/ +SHELL ["/bin/bash", "-c"] -RUN cd /workspace/isaaclab && \ - patch -p1 < /tmp/events_random_texture.patch && \ - patch -p1 < /tmp/from_files_semantic_tags.patch && \ - rm /tmp/events_random_texture.patch /tmp/from_files_semantic_tags.patch - -WORKDIR /workspace - -# Fix livestream public endpoint address issue in 2.0.2/2.1.0 -RUN sed -i '/--\/app\/livestream\/publicEndpointAddress=/d' /workspace/isaaclab/source/isaaclab/isaaclab/app/app_launcher.py - -# Install uv using curl for openpi +# Install all packages in a single layer to avoid caching issues RUN apt-get update && \ - apt-get install -y software-properties-common && \ - add-apt-repository ppa:ubuntu-toolchain-r/test && \ - apt-get update && \ apt-get install -y \ - curl \ - openssh-client \ - cmake \ wget \ + curl \ + jq \ + vim \ + git \ + xvfb \ build-essential \ - pybind11-dev \ + cmake \ + vulkan-tools \ + unzip \ lsb-release \ libglib2.0-0 \ libdbus-1-3 \ libopengl0 \ libxcb-keysyms1 \ - libxcb-cursor0 \ - ninja-build \ - libgl1-mesa-dev \ - ffmpeg \ - gcc-12 \ - g++-12 && \ - update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \ - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 && \ - mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts - -# Install CUDA 12.8 - -WORKDIR /tmp - -RUN apt-get update && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ - dpkg -i cuda-keyring_1.1-1_all.deb && \ - apt-get update && \ - apt-get -y install cuda-toolkit-12-8 - -ENV PATH=/usr/local/cuda-12.8/bin${PATH:+:${PATH}} -ENV LD_LIBRARY_PATH=/usr/local/cuda-12.8/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} - -COPY tools/env_setup/install_lerobot.sh /tmp/env_setup/ -COPY tools/env_setup/install_pi0.sh /tmp/env_setup/ -COPY tools/env_setup/install_holoscan.sh /tmp/env_setup/ -COPY tools/env_setup/install_robotic_us_ext.sh /tmp/env_setup/ - -COPY workflows/robotic_ultrasound/scripts /workspace/robotic_ultrasound/scripts - -ENV PYTHON_EXECUTABLE=/workspace/isaaclab/_isaac_sim/python.sh - -# # Set up the Simulation + libglu1-mesa && \ + rm -rf /var/lib/apt/lists/* + +# Add github.com to list of known hosts for git clone with ssh +# use COPY command instead +RUN mkdir -p ~/.ssh && \ + curl --silent https://api.github.com/meta \ + | jq --raw-output '"github.com "+.ssh_keys[]' >> ~/.ssh/known_hosts +#RUN mkdir -p ~/.ssh +#COPY ./github /root/.ssh/known_hosts + +# all devices should be visible +ENV NVIDIA_VISIBLE_DEVICES=all +# set 'compute' driver cap to use Cuda +# set 'video' driver cap to use the video encoder +# set 'graphics' driver cap to use OpenGL/EGL +# set 'display' to allow use of virtual display +ENV NVIDIA_DRIVER_CAPABILITIES=graphics,video,compute,utility,display + +# Install Everything in conda environment + +WORKDIR /workspace/i4h-workflows + +COPY tools /workspace/i4h-workflows/tools +COPY tutorials /workspace/i4h-workflows/tutorials +COPY workflows /workspace/i4h-workflows/workflows +COPY holoscan_i4h /workspace/i4h-workflows/holoscan_i4h + +# Install miniconda3 and create robotic_ultrasound RUN --mount=type=ssh \ - $PYTHON_EXECUTABLE -m pip install --no-deps \ - git+ssh://git@github.com/isaac-for-healthcare/i4h-asset-catalog.git@v0.2.0rc1 && \ - $PYTHON_EXECUTABLE -m pip install \ - rti.connext==7.3.0 \ - pyrealsense2==2.55.1.6486 \ - toml==0.10.2 \ - dearpygui==2.0.0 \ - setuptools==75.8.0 \ - pydantic==2.10.6 - -RUN mkdir -p /workspace/third_party - -RUN /tmp/env_setup/install_robotic_us_ext.sh /workspace/robotic_ultrasound/scripts/simulation - -RUN /tmp/env_setup/install_lerobot.sh /workspace/third_party/lerobot - -RUN /tmp/env_setup/install_pi0.sh /workspace/third_party/openpi - -RUN /tmp/env_setup/install_holoscan.sh /workspace/robotic_ultrasound/scripts/holoscan_apps - -COPY tools/env_setup/install_cosmos_transfer1.sh /tmp/env_setup/ - -COPY tools/env_setup/install_cudnn.sh /tmp/env_setup/ - -RUN /tmp/env_setup/install_cudnn.sh + mkdir -p ~/miniconda3 && \ + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh && \ + bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 && \ + rm ~/miniconda3/miniconda.sh && \ + source ~/miniconda3/bin/activate && \ + conda init --all && \ + conda create -n robotic_ultrasound python=3.10.14 -y RUN --mount=type=ssh \ - /tmp/env_setup/install_cosmos_transfer1.sh /workspace/third_party/cosmos-transfer1 - -WORKDIR /workspace/robotic_ultrasound/scripts - -ENV PYTHONPATH=/workspace/robotic_ultrasound/scripts:/workspace/third_party/cosmos-transfer1 + source ~/miniconda3/bin/activate && \ + conda activate robotic_ultrasound && \ + cd /workspace/i4h-workflows && \ + BUILD_DOCKER_IMAGE=true bash tools/env_setup_robot_us.sh +ENV PYTHONPATH=/workspace/i4h-workflows/workflows/robotic_ultrasound/scripts ENV RTI_LICENSE_FILE=/root/rti/rti_license.dat diff --git a/workflows/robotic_ultrasound/docker/README.md b/workflows/robotic_ultrasound/docker/README.md index cabd1d0b..1b4a3162 100644 --- a/workflows/robotic_ultrasound/docker/README.md +++ b/workflows/robotic_ultrasound/docker/README.md @@ -1,36 +1,52 @@ -# Simulation LiveStream from Remote Docker Container +# Robotic Ultrasound Docker Container -This document describes how to run the simulation in a remote docker container and stream the simulation to a local machine. +This guide provides instructions for running robotic ultrasound simulations using Docker containers with Isaac Sim. ## Prerequisites -Please refer to [Livestream Clients Guide in Isaac Sim](https://docs.isaacsim.omniverse.nvidia.com/latest/installation/manual_livestream_clients.html#isaac-sim-short-webrtc-streaming-client) and download [Isaac Sim WebRTC Streaming Client](https://docs.isaacsim.omniverse.nvidia.com/latest/installation/download.html#isaac-sim-latest-release) - -## Build Docker Image - -To build the docker image, you will need to set up the SSH agent and add your SSH key to the agent, so that the docker build process can access the private repository. - -```bash +- **Docker Engine** +- **NVIDIA Docker Runtime** +- **Git** with SSH key access to private repositories +- **X11 forwarding** support (for GUI mode) +- **RTI License** + - Please refer to the [Environment Setup](../README.md#environment-setup) for instructions to prepare the I4H assets and RTI license locally. + - The license file `rti_license.dat` should be saved in a directory in your host file system, (e.g. `~/docker/rti`), which can be mounted to the docker container. +- **Ultrasound Raytracing Simulator** + - Please refer to the [Environment Setup - Install the raytracing ultrasound simulator](../README.md#install-raytracing-ultrasound-simulator) instructions to set up the raytracing ultrasound simulator locally. + - The `raysim` directory should be available on your host file system (e.g., `~/raysim`) to be mounted to the docker container. + +## Build the Docker Image + +```sh +# Clone the repository +git clone https://github.com/isaac-for-healthcare/i4h-workflows.git +cd i4h-workflows + +# Enable BuildKit export DOCKER_BUILDKIT=1 -eval "$(ssh-agent -s)" -ssh-add ~/.ssh/id_ed25519 # Replace with your SSH key -docker build --ssh default -f workflows/robotic_ultrasound/docker/Dockerfile -t robot_us:latest . -``` -## Prepare the RTI License Locally - -Please refer to the [Environment Setup](../README.md#environment-setup) for instructions to prepare the I4H assets and RTI license locally. - -The license file `rti_license.dat` should be saved in a directory in your host file system, (e.g. `~/docker/rti`), which can be mounted to the docker container. +# Set up SSH agent for private repository access +eval "$(ssh-agent -s)" +ssh-add ~/.ssh/id_ed25519 # Replace with your SSH key path -## Run the Container +# Build the Docker image with SSH forwarding +docker build --ssh default --no-cache -f workflows/robotic_ultrasound/docker/Dockerfile -t robotic_us:latest . +``` -Since we need to run multiple instances (policy runner, simulation, etc.), we need to use `-d` to run the container in detached mode. +## Running the Container -```bash +```sh +# Allow Docker to access X11 display xhost +local:docker -docker run --name isaac-sim --entrypoint bash -itd --runtime=nvidia --gpus all -e "ACCEPT_EULA=Y" --rm --network=host \ + +# Run container with GUI support in background +docker run --name isaac-sim -itd --gpus all --rm \ + --network=host \ + --runtime=nvidia \ + --entrypoint=bash \ -e DISPLAY=$DISPLAY \ + -e "OMNI_KIT_ACCEPT_EULA=Y" \ + -e "ACCEPT_EULA=Y" \ -e "PRIVACY_CONSENT=Y" \ -v /tmp/.X11-unix:/tmp/.X11-unix \ -v ~/docker/isaac-sim/cache/kit:/isaac-sim/kit/cache:rw \ @@ -43,23 +59,111 @@ docker run --name isaac-sim --entrypoint bash -itd --runtime=nvidia --gpus all - -v ~/docker/isaac-sim/documents:/root/Documents:rw \ -v ~/.cache/i4h-assets:/root/.cache/i4h-assets:rw \ -v ~/docker/rti:/root/rti:ro \ - robot_us:latest + -v ~/raysim:/workspace/i4h-workflows/workflows/robotic_ultrasound/scripts/raysim:ro \ + robotic_us:latest ``` -### Run Policy +## Running the Simulation + +> Multiple terminals are required for running the simulation. + +### 1. Run Policy + +First, start the policy runner in the background. This process will wait for simulation data and provide control commands. ```bash docker exec -it isaac-sim bash -# Inside the container, run the policy -python policy_runner/run_policy.py +# Inside the container +conda activate robotic_ultrasound +python workflows/robotic_ultrasound/scripts/policy_runner/run_policy.py ``` -The policy runner will be running in an environment managed by `uv` located in `/workspace/openpi/.venv`. +**Note:** The policy runner should be started first since it will continuously run and communicate with the simulation via DDS. -### Run Simulation +### 2. Run Isaac Sim Simulation + +In a separate terminal session, start the main simulation: ```bash docker exec -it isaac-sim bash # Inside the container, run the simulation -python simulation/environments/sim_with_dds.py --enable_camera --livestream 2 +conda activate robotic_ultrasound +python workflows/robotic_ultrasound/scripts/simulation/environments/sim_with_dds.py --enable_camera +``` + + +### 3. Ultrasound Raytracing Simulation (Optional) + +For realistic ultrasound image generation, you can run the ultrasound raytracing simulator: + +```bash +docker exec -it isaac-sim bash +# Inside the container, run the ultrasound raytracing simulator +conda activate robotic_ultrasound +python workflows/robotic_ultrasound/scripts/simulation/examples/ultrasound_raytracing.py +``` + +This will generate and stream ultrasound images via DDS communication. + +### 4. Visualization Utility (Optional) + +To visualize the ultrasound images and other sensor data, you can use the visualization utility: + +```bash +docker exec -it isaac-sim bash +# Inside the container, run the visualization utility +conda activate robotic_ultrasound +python workflows/robotic_ultrasound/scripts/utils/visualization.py +``` + +This utility will display real-time ultrasound images and other sensor data streams. + +## Troubleshooting + +### GPU Device Errors + +- **"Failed to create any GPU devices" or "omni.gpu_foundation_factory.plugin" errors**: This indicates GPU device access issues. Try these fixes in order: + + **Verify NVIDIA drivers and container toolkit installation**: + ```bash + # Check NVIDIA driver + nvidia-smi + + # Check Docker can access GPU + docker run --rm --gpus all --runtime=nvidia nvidia/cuda:12.8.1-devel-ubuntu24.04 nvidia-smi + ``` + If the `--runtime=nvidia` is not working, you can try to configure Docker daemon for NVIDIA runtime. The file should contain the following content: + ```json + { + "default-runtime": "nvidia", + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + } + } + ``` + +- **Policy not responding**: Ensure the policy runner is started before the simulation and is running in the background + +- **No ultrasound images**: Verify that the `raysim` directory is properly mounted and the ultrasound raytracing simulator is running + +- **Display issues**: Make sure `xhost +local:docker` was run before starting the container and the terminal shouldn't be running in a headless mode (e.g. in ssh connection without `-X` option) + +- **Missing assets**: Verify that the I4H assets and RTI license are properly mounted and accessible + +### Verification Commands + +After applying fixes, test with these commands: + +```bash +# Test basic GPU access +docker run --rm --gpus all nvidia/cuda:12.0-base-ubuntu20.04 nvidia-smi + +# Test Vulkan support +docker run --rm --gpus all -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY robotic_us:latest vulkaninfo + +# Test OpenGL support +docker run --rm --gpus all -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=$DISPLAY robotic_us:latest glxinfo | head -20 ``` diff --git a/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_cast/clarius_cast.py b/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_cast/clarius_cast.py index 84f31ac0..5a04f734 100644 --- a/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_cast/clarius_cast.py +++ b/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_cast/clarius_cast.py @@ -14,232 +14,14 @@ # limitations under the License. import argparse -import ctypes import os -import sys -from io import BytesIO -import holoscan -import numpy as np -import rti.connextdds as dds -from dds.schemas.usp_data import UltraSoundProbeData -from holoscan.core import Application, Operator, OperatorSpec +from holoscan.core import Application from holoscan.operators import HolovizOp from holoscan.resources import UnboundedAllocator -from PIL import Image -script_dir = os.path.dirname(os.path.abspath(__file__)) - -# load the libcast.so shared library -libcast_handle = ctypes.CDLL(f"{script_dir}/lib/libcast.so", ctypes.RTLD_GLOBAL)._handle -# load the pyclariuscast.so shared library -ctypes.cdll.LoadLibrary(f"{script_dir}/lib/pyclariuscast.so") - -sys.path.append(f"{script_dir}/lib") -import pyclariuscast - -# The current image -img = None - - -def processed_image_cb(image, width, height, sz, microns_per_pixel, timestamp, angle, imu): - """ - Callback function that processes a scan-converted image. - - Parameters: - image: The processed image data. - width: The width of the image in pixels. - height: The height of the image in pixels. - sz: Full size of the image in bytes. - microns_per_pixel: Microns per pixel. - timestamp: The timestamp of the image in nanoseconds. - angle: Acquisition angle for volumetric data. - imu: IMU data tagged with the frame. - """ - bpp = sz / (width * height) - - global img - - if bpp == 4: - # Handle RGBA - img = Image.frombytes("RGBA", (width, height), image) - else: - # Handle JPEG - img = Image.open(BytesIO(image)) - - -def raw_image_cb(image, lines, samples, bps, axial, lateral, timestamp, jpg, rf, angle): - """ - Callback function for raw image data. - - Parameters: - image: The raw pre scan-converted image data, uncompressed 8-bit or jpeg compressed. - lines: Number of lines in the data. - samples: Number of samples in the data. - bps: Bits per sample. - axial: Microns per sample. - lateral: Microns per line. - timestamp: The timestamp of the image in nanoseconds. - jpg: JPEG compression size if the data is in JPEG format. - rf: Flag indicating if the image is radiofrequency data. - angle: Acquisition angle for volumetric data. - """ - # Not used in sample app - return - - -def spectrum_image_cb(image, lines, samples, bps, period, micronsPerSample, velocityPerSample, pw): - """ - Callback function for spectrum image data. - - Parameters: - image: The spectral image data. - lines: The number of lines in the spectrum. - samples: The number of samples per line. - bps: Bits per sample. - period: Line repetition period of the spectrum. - micronsPerSample: Microns per sample for an M spectrum. - velocityPerSample: Velocity per sample for a PW spectrum. - pw: Flag that is True for a PW spectrum, False for an M spectrum - """ - # Not used in sample app - return - - -def imu_data_cb(imu): - """ - Callback function for IMU data. - - Parameters: - imu: Inertial data tagged with the frame. - """ - # Not used in sample app - return - - -def freeze_cb(frozen): - """ - Callback function for freeze state changes. - - Parameters: - frozen: The freeze state of the imaging system. - """ - if frozen: - print("\nClarius: Run imaging") - else: - print("\nClarius: Stop imaging") - return - - -def buttons_cb(button, clicks): - """ - Callback function for button presses. - - Parameters: - button: The button that was pressed. - clicks: The number of clicks performed. - """ - print(f"button pressed: {button}, clicks: {clicks}") - return - - -class NoOp(Operator): - """A sink operator that takes input and discards them.""" - - def setup(self, spec: OperatorSpec): - """Define input ports.""" - spec.input("input") - - def compute(self, op_input, op_output, context): - """Receive and discard input frames.""" - op_input.receive("input") - - -class ClariusCastOp(Operator): - """ - Operator to interface with a Clarius UltraSound Probe using Clarius Cast APIs. - Captures processed image data from a Clarius Probe and publishes it via DDS. - """ - - def __init__(self, fragment, *args, ip, port, domain_id, width, height, topic_out, **kwargs): - """ - Initializes the ClariusCastOp operator. - - Parameters: - fragment: The fragment this operator belongs to. - ip: IP address of the Clarius probe. - port: Port number for Clarius probe. - domain_id: Domain ID for DDS communication. - width: Width of the image in pixels. - height: Height of the image in pixels. - topic_out: The DDS topic to publish ultrasound data. - """ - self.ip = ip - self.port = port - self.domain_id = domain_id - self.width = width - self.height = height - self.topic_out = topic_out - super().__init__(fragment, *args, **kwargs) - - def setup(self, spec: OperatorSpec): - """Set up the output for this operator.""" - spec.output("output") - - def start(self): - """Initialize and start the Clarius Cast connection and DDS publisher.""" - # initialize - path = os.path.expanduser("~/") - cast = pyclariuscast.Caster( - processed_image_cb, raw_image_cb, imu_data_cb, spectrum_image_cb, freeze_cb, buttons_cb - ) - self.cast = cast - ret = cast.init(path, self.width, self.height) - - if ret: - print("Initialization succeeded") - # Use JPEG format - JPEG = 2 - ret = cast.setFormat(JPEG) - if ret: - print("Setting format to JPEG") - else: - print("Failed setting format to JPEG") - # unload the shared library before destroying the cast object - ctypes.CDLL("libc.so.6").dlclose(libcast_handle) - cast.destroy() - exit() - - ret = cast.connect(self.ip, self.port, "research") - if ret: - print(f"Connected to {self.ip} on port {self.port}") - else: - print("Connection failed") - # unload the shared library before destroying the cast object - ctypes.CDLL("libc.so.6").dlclose(libcast_handle) - cast.destroy() - exit() - else: - print("Initialization failed") - return - - dp = dds.DomainParticipant(domain_id=self.domain_id) - topic = dds.Topic(dp, self.topic_out, UltraSoundProbeData) - self.writer = dds.DataWriter(dp.implicit_publisher, topic) - - def compute(self, op_input, op_output, context): - """Process the current image and publish it to DDS.""" - global img - - if img is None: - return - - image = np.array(img) - d = UltraSoundProbeData() - d.data = image.tobytes() - self.writer.write(d) - out_message = {"image": holoscan.as_tensor(image)} - op_output.emit(out_message, "output") +from holoscan_i4h.operators.clarius_cast.clarius_cast import ClariusCastOp +from holoscan_i4h.operators.no_op.no_op import NoOp class ClariusCastApp(Application): diff --git a/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_solum/clarius_solum.py b/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_solum/clarius_solum.py index 13f5d590..a46ef95b 100644 --- a/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_solum/clarius_solum.py +++ b/workflows/robotic_ultrasound/scripts/holoscan_apps/clarius_solum/clarius_solum.py @@ -14,282 +14,14 @@ # limitations under the License. import argparse -import ctypes import os -import sys -from io import BytesIO -from time import sleep -import holoscan -import numpy as np -import rti.connextdds as dds -from dds.schemas.usp_data import UltraSoundProbeData -from holoscan.core import Application, Operator, OperatorSpec +from holoscan.core import Application from holoscan.operators import HolovizOp from holoscan.resources import UnboundedAllocator -from PIL import Image -script_dir = os.path.dirname(os.path.abspath(__file__)) - -# load the libsolum.so shared library -libsolum_handle = ctypes.CDLL(f"{script_dir}/lib/libsolum.so", ctypes.RTLD_GLOBAL)._handle - -sys.path.append(f"{script_dir}/lib") -import pysolum - -# Is probe connected -connected = False -# Probe is running -running = False -# The current image -img = None - - -def connect_cb(res, port, status): - """ - Callback function for connection events. - - Parameters: - res: The connection result. - port: UDP port used for streaming. - status: The status message. - """ - print(f"Connection: {res} {status}") - if res == pysolum.CusConnection.ProbeConnected: - global connected - connected = True - print(f"Streaming on port: {port}") - - -def cert_cb(days_valid): - """ - Callback function when the certificate is set. - - Parameters: - days_valid: Number of days the certificate is valid. - """ - print(f"Certificate valid for {days_valid} days.") - - -def power_down_cb(res, tm): - """ - Callback function when the device is powered down. - - Parameters: - res: The power down reason. - tm: Time in seconds until probe powers down, 0 for immediate shutdown. - """ - print(f"Powering down: {res} in {tm} seconds") - - -def processed_image_cb(image, width, height, size, micros_per_pixel, origin_x, origin_y, fps): - """ - Callback function when a new processed image is streamed. - - Parameters: - image: The scan-converted image data. - width: Image width in pixels. - height: Image height in pixels. - size: Full size of the image. - microns_per_pixel: Microns per pixel. - origin_x: Image origin in microns (horizontal axis). - origin_y: Image origin in microns (vertical axis). - fps: Frames per second. - """ - bpp = size / (width * height) - - global img - - if bpp == 4: - img = Image.frombytes("RGBA", (width, height), image).convert("L") - else: - img = Image.open(BytesIO(image)).convert("L") - - -def imu_port_cb(port): - """ - Callback function for new IMU data streaming port. - - Parameters: - port: The new IMU data UDP streaming port. - """ - # Not used in sample code - return - - -def imu_data_cb(pos): - """ - Callback function for new IMU data. - - Parameters: - pos: Positional information data tagged with the image. - """ - # Not used in sample code - return - - -def imaging_cb(state, imaging): - """ - Imaging callback function. - - Parameters: - state: The imaging state. - imaging: 1 if running, 0 if stopped. - """ - if imaging == 0: - print(f"State: {state} imaging: Stopped") - else: - global running - running = True - print(f"State: {state} imaging: Running") - - -def error_cb(code, msg): - """ - Callback function for error events. - - Parameters: - code: Error code associated with the error. - msg: The error message. - """ - print(f"Error: {code}: {msg}") - - -def buttons_cb(button, clicks): - """ - Callback function when a button is pressed. - - Parameters: - button: The button that was pressed. - clicks: Number of clicks performed. - """ - print(f"button pressed: {button}, clicks: {clicks}") - - -class NoOp(Operator): - """A sink operator that takes input and discards them.""" - - def setup(self, spec: OperatorSpec): - """Define input ports.""" - spec.input("input") - - def compute(self, op_input, op_output, context): - """Receive and discard input frames.""" - op_input.receive("input") - - -class ClariusSolumOp(Operator): - """ - Operator to interface with a Clarius UltraSound Probe using Clarius Solum APIs. - Captures processed image data from a Clarius Probe and publishes it via DDS. - """ - - def __init__(self, fragment, *args, ip, port, cert, model, app, domain_id, width, height, topic_out, **kwargs): - """ - Initializes the ClariusSolumOp operator. - - Parameters: - fragment: The fragment this operator belongs to. - ip: IP address of the Clarius probe. - port: Port number for Clarius probe. - cert: Path to the probe certificate. - model: The Clarius probe model name. - app: The ultrasound application to perform. - domain_id: Domain ID for DDS communication. - width: Width of the image in pixels. - height: Height of the image in pixels. - topic_out: The DDS topic to publish ultrasound data. - """ - self.ip = ip - self.port = port - self.cert = cert - self.model = model - self.app = app - self.domain_id = domain_id - self.width = width - self.height = height - self.topic_out = topic_out - super().__init__(fragment, *args, **kwargs) - - def setup(self, spec: OperatorSpec): - """Set up the output for this operator.""" - spec.output("output") - - def stop(self): - """Stops imaging on the Clarius probe.""" - print("Clarius: Stop imaging") - self.solum.stop_imaging() - - def start(self): - """ - Initializes and starts the Clarius Solum API connection. - Establishes a connection, loads the application, and starts imaging. - """ - # initialize - path = os.path.expanduser("~/") - - solum = pysolum.Solum( - connect_cb, - cert_cb, - power_down_cb, - processed_image_cb, - imu_port_cb, - imu_data_cb, - imaging_cb, - buttons_cb, - error_cb, - ) - - self.solum = solum - ret = solum.init(path, self.width, self.height) - - if ret: - global connected - - solum.set_certificate(self.cert) - ret = solum.connect(self.ip, self.port, "research") - - while not connected: - sleep(1) - - if ret: - print(f"Connected to {self.ip} on port {self.port}") - else: - print("Connection failed") - # unload the shared library before destroying the solum object - ctypes.CDLL("libc.so.6").dlclose(libsolum_handle) - solum.destroy() - exit() - - solum.load_application(self.model, self.app) - sleep(5) - solum.run_imaging() - - while not running: - sleep(1) - - else: - print("Initialization failed") - return - - dp = dds.DomainParticipant(domain_id=self.domain_id) - topic = dds.Topic(dp, self.topic_out, UltraSoundProbeData) - self.writer = dds.DataWriter(dp.implicit_publisher, topic) - print(f"Creating writer for topic: {self.domain_id}:{self.topic_out}") - - def compute(self, op_input, op_output, context): - """Processes the current ultrasound image and publishes it via DDS and to the downstream operator.""" - global img - - if img is None: - return - - image = np.array(img) - d = UltraSoundProbeData() - d.data = image.tobytes() - self.writer.write(d) - out_message = {"image": holoscan.as_tensor(image)} - op_output.emit(out_message, "output") +from holoscan_i4h.operators.clarius_solum.clarius_solum import ClariusSolumOp +from holoscan_i4h.operators.no_op.no_op import NoOp class ClariusSolumApp(Application): diff --git a/workflows/robotic_ultrasound/scripts/holoscan_apps/examples/README.md b/workflows/robotic_ultrasound/scripts/holoscan_apps/examples/README.md deleted file mode 100644 index 0a805472..00000000 --- a/workflows/robotic_ultrasound/scripts/holoscan_apps/examples/README.md +++ /dev/null @@ -1 +0,0 @@ -This folder contains the examples to execute the holoscan sensor apps, and connect with the `policy_runner`. diff --git a/workflows/robotic_ultrasound/scripts/holoscan_apps/realsense/camera.py b/workflows/robotic_ultrasound/scripts/holoscan_apps/realsense/camera.py index 279378ad..2bf0a0cd 100644 --- a/workflows/robotic_ultrasound/scripts/holoscan_apps/realsense/camera.py +++ b/workflows/robotic_ultrasound/scripts/holoscan_apps/realsense/camera.py @@ -16,147 +16,13 @@ import argparse import os -import holoscan -import numpy as np -import pyrealsense2 as rs -import rti.connextdds as dds -from dds.schemas.camera_info import CameraInfo from holoscan.conditions import CountCondition -from holoscan.core import Application, Operator -from holoscan.core._core import OperatorSpec +from holoscan.core import Application from holoscan.operators.holoviz import HolovizOp from holoscan.resources import UnboundedAllocator - -class RealsenseOp(Operator): - """ - Operator to interface with an Intel RealSense camera. - Captures RGB and depth frames, optionally publishing them via DDS. - """ - - def __init__( - self, - fragment, - *args, - domain_id, - width, - height, - topic_rgb, - topic_depth, - device_idx, - framerate, - show_holoviz, - **kwargs, - ): - """ - Initialize the RealSense operator. - - Parameters: - - domain_id (int): DDS domain ID. - - width (int): Width of the camera stream. - - height (int): Height of the camera stream. - - topic_rgb (str): DDS topic for RGB frames. - - topic_depth (str): DDS topic for depth frames. - - device_idx (int): Camera device index. - - framerate (int): Frame rate for the camera stream. - - show_holoviz (bool): Whether to display frames using Holoviz. - """ - self.domain_id = domain_id - self.width = width - self.height = height - self.topic_rgb = topic_rgb - self.topic_depth = topic_depth - self.device_idx = device_idx - self.framerate = framerate - self.show_holoviz = show_holoviz - super().__init__(fragment, *args, **kwargs) - - self.pipeline = rs.pipeline() - self.rgb_writer = None - self.depth_writer = None - - def setup(self, spec: OperatorSpec): - """Define the output ports for the operator.""" - if self.topic_rgb: - spec.output("color") - if self.topic_depth and not self.show_holoviz: - spec.output("depth") - - def start(self): - """ - Configure and start the RealSense camera pipeline. - Sets up DDS writers if topics are provided. - """ - config = rs.config() - context = rs.context() - for device in context.query_devices(): - print(f"+++ Available device: {device}") - - if self.device_idx is not None: - if self.device_idx < len(context.query_devices()): - config.enable_device(context.query_devices()[self.device_idx].get_info(rs.camera_info.serial_number)) - else: - print(f"Ignoring input device_idx: {self.device_idx}") - - dp = dds.DomainParticipant(domain_id=self.domain_id) - if self.topic_rgb: - print("Enabling color...") - self.rgb_writer = dds.DataWriter(dp.implicit_publisher, dds.Topic(dp, self.topic_rgb, CameraInfo)) - config.enable_stream( - rs.stream.color, - width=self.width, - height=self.height, - format=rs.format.rgba8, - framerate=self.framerate, - ) - if self.topic_depth: - print("Enabling depth...") - config.enable_stream( - rs.stream.depth, - width=self.width, - height=self.height, - format=rs.format.z16, - framerate=self.framerate, - ) - self.depth_writer = dds.DataWriter(dp.implicit_publisher, dds.Topic(dp, self.topic_depth, CameraInfo)) - self.pipeline.start(config) - - def compute(self, op_input, op_output, context): - """ - Capture frames from the RealSense camera and publish them via DDS. - """ - frames = self.pipeline.wait_for_frames() - color = None - if self.rgb_writer: - color = np.asanyarray(frames.get_color_frame().get_data()) - self.rgb_writer.write(CameraInfo(data=color.tobytes(), width=self.width, height=self.height)) - op_output.emit({"color": holoscan.as_tensor(color)} if self.show_holoviz else True, "color") - if self.depth_writer: - depth = np.asanyarray(frames.get_depth_frame().get_data()).astype(np.float32) / 1000.0 - self.depth_writer.write(CameraInfo(data=depth.tobytes(), width=self.width, height=self.height)) - if not self.show_holoviz: - op_output.emit({"depth": holoscan.as_tensor(depth)}, "depth") - - -class NoOp(Operator): - """A sink operator that takes input and discards them.""" - - def __init__(self, fragment, depth, *args, **kwargs): - """Initialize the operator.""" - self.depth = depth - super().__init__(fragment, *args, **kwargs) - - def setup(self, spec: OperatorSpec): - """Define input ports for color and optional depth.""" - spec.input("color") - if self.depth: - spec.input("depth") - - def compute(self, op_input, op_output, context): - """Receive and discard input frames.""" - op_input.receive("color") - if self.depth: - op_input.receive("depth") +from holoscan_i4h.operators.no_op.no_op import NoOp +from holoscan_i4h.operators.realsense.realsense import RealsenseOp class RealsenseApp(Application): @@ -201,8 +67,12 @@ def compose(self): ) self.add_flow(camera, holoviz, {("color", "receivers")}) else: - port_map = {("color", "color"), ("depth", "depth")} if self.topic_depth else {("color", "color")} - self.add_flow(camera, NoOp(self, True if self.topic_depth else None), port_map) + if self.topic_depth: + noop = NoOp(self, ["color", "depth"]) + self.add_flow(camera, noop, {("color", "color"), ("depth", "depth")}) + else: + noop = NoOp(self, ["color"]) + self.add_flow(camera, noop) def main(): diff --git a/workflows/robotic_ultrasound/scripts/simulation/README.md b/workflows/robotic_ultrasound/scripts/simulation/README.md index d8147329..b58354b7 100644 --- a/workflows/robotic_ultrasound/scripts/simulation/README.md +++ b/workflows/robotic_ultrasound/scripts/simulation/README.md @@ -45,9 +45,10 @@ Please refer to the [Environment Setup - Set environment variables before runnin #### Run the script -Please move to the current [`simulation` folder](./) and execute: +Please cd to the current [`simulation` folder](./) and execute: ```sh +cd workflows/robotic_ultrasound/scripts/simulation python imitation_learning/pi0_policy/eval.py --enable_camera ``` @@ -68,9 +69,10 @@ Please refer to the [Environment Setup - Set environment variables before runnin #### Run the script -Please move to the current [`simulation` folder](./) and execute: +Please cd to the current [`simulation` folder](./) and execute: ```sh +cd ../simulation python environments/sim_with_dds.py --enable_cameras ``` @@ -119,9 +121,10 @@ The state machine integrates multiple control modules: Please refer to the [Environment Setup - Set environment variables before running the scripts](../../README.md#set-environment-variables-before-running-the-scripts) to set the `PYTHONPATH`. -Then please move to the current [`simulation` folder](./) and execute: +Then please cd to the current [`simulation` folder](./) and execute: ```sh +cd ../simulation python environments/state_machine/liver_scan_sm.py --enable_cameras ``` @@ -185,7 +188,7 @@ Ensure your `PYTHONPATH` is set up as described in the [Environment Setup - Set Navigate to the [`simulation` folder](./) and execute: ```sh -python environments/state_machine/replay_recording.py --hdf5_path /path/to/your/hdf5_data_directory --task +python environments/state_machine/replay_recording.py /path/to/your/hdf5_data_directory --task --enable_camera ``` Replace `/path/to/your/hdf5_data_directory` with the actual path to the directory containing your `data_*.hdf5` files or single HDF5 file, and `` with the task name used during data collection (e.g., `Isaac-Teleop-Torso-FrankaUsRs-IK-RL-Rel-v0`). @@ -207,23 +210,30 @@ Replace `/path/to/your/hdf5_data_directory` with the actual path to the director [Cosmos-Transfer1](https://github.com/nvidia-cosmos/cosmos-transfer1) is a world-to-world transfer model designed to bridge the perceptual divide between simulated and real-world environments. We introduce a training-free guided generation method on top of Cosmos-Transfer1 to overcome unsatisfactory results on unseen healthcare simulation assets. Directly applying Cosmos-Transfer with various control inputs results in unsatisfactory outputs for the human phantom and robotic arm (see bottom figure). In contrast, our guided generation method preserves the appearance of the phantom and robotic arm while generating diverse backgrounds. + This training-free guided generation approach by encoding simulation videos into the latent space and applying spatial masking to guide the generation process. The trade-off between realism and faithfulness can be controlled by adjusting the number of guided denoising steps. In addition, our generation pipeline supports multi-view video generation. We first leverage the camera information to warp the generated room view to wrist view, then use it as the guidance of wrist-view generation. #### Download Cosmos-transfer1 Checkpoints -Please install cosmos-transfer1 dependency and move to the third party `cosmos-transfer1` folder. The following command downloads the checkpoints: +The cosmos-transfer1 dependency is already installed after completing the [Installation](#installation) section. Please navigate to the third party `cosmos-transfer1` folder and run the following command to download the checkpoints: ```sh -conda activate cosmos-transfer1 +cd third_party/cosmos-transfer1 CUDA_HOME=$CONDA_PREFIX PYTHONPATH=$(pwd) python scripts/download_checkpoints.py --output_dir checkpoints/ ``` + +> **Note:** You need to be logged in to Hugging Face (`huggingface-cli login`) before running the download script. Additionally, for the `meta-llama/Llama-Guard-3-8B` model, you need to request additional access on the [Hugging Face model page](https://huggingface.co/meta-llama/Llama-Guard-3-8B) before you can download it. + + + #### Video Prompt Generation We follow the idea in [lucidsim](https://github.com/lucidsim/lucidsim) to first generate batches of meta prompt that contains a very concise description of the potential scene, then instruct the LLM (e.g., [gemma-3-27b-it](https://build.nvidia.com/google/gemma-3-27b-it)) to upsample the meta prompt with detailed descriptions. We provide example prompts in [`generated_prompts_two_seperate_views.json`](./environments/cosmos_transfer1/config/generated_prompts_two_seperate_views.json). #### Running Cosmos-transfer1 + Guided Generation -Please move to the current [`simulation` folder](./) and execute the following command to start the generation pipeline: +Please cd back to the current [simulation folder](./) and execute the following command to start the generation pipeline: ```sh +cd ../../workflows/robotic_ultrasound/scripts/simulation/ export CHECKPOINT_DIR="path to downloaded cosmos-transfer1 checkpoints" # Set project root path export PROJECT_ROOT="{your path}/i4h-workflows" @@ -280,9 +290,10 @@ The teleoperation interface allows direct control of the robotic arm using vario #### Running Teleoperation -Please move to the current [`simulation` folder](./) and execute the following command to start the teleoperation: +Please cd to the current [`simulation` folder](./) and execute the following command to start the teleoperation: ```sh +cd ../simulation python environments/teleoperation/teleop_se3_agent.py --enable_cameras ``` @@ -331,7 +342,7 @@ based on 3D meshes. The simulator uses Holoscan framework and DDS communication #### NVIDIA OptiX Raytracing with Python Bindings -For instructions on preparing and building the Python module, please refer to the [Environment Setup](../../README.md#install-the-raytracing-ultrasound-simulator) instructions and [Ultrasound Raytracing README](https://github.com/isaac-for-healthcare/i4h-sensor-simulation/blob/v0.2.0rc1/ultrasound-raytracing/README.md) to setup the `raysim` module correctly. +For instructions on preparing and building the Python module, please refer to the [Environment Setup](../../README.md#install-the-raytracing-ultrasound-simulator) instructions and [Ultrasound Raytracing README](https://github.com/isaac-for-healthcare/i4h-sensor-simulation/blob/v0.2.0rc2/ultrasound-raytracing/README.md) to setup the `raysim` module correctly. #### Configuration diff --git a/workflows/robotic_ultrasound/scripts/simulation/environments/state_machine/replay_recording.py b/workflows/robotic_ultrasound/scripts/simulation/environments/state_machine/replay_recording.py index f0c3cf85..174328a8 100644 --- a/workflows/robotic_ultrasound/scripts/simulation/environments/state_machine/replay_recording.py +++ b/workflows/robotic_ultrasound/scripts/simulation/environments/state_machine/replay_recording.py @@ -42,14 +42,17 @@ app_launcher = AppLauncher(args_cli) simulation_app = app_launcher.app - +# isort: off import gymnasium as gym import torch from isaaclab_tasks.utils.parse_cfg import parse_env_cfg + # Import extensions to set up environment tasks from robotic_us_ext import tasks # noqa: F401 from simulation.environments.state_machine.utils import reset_scene_to_initial_state, validate_hdf5_path +# isort: on + def main(): """Main function.""" diff --git a/workflows/robotic_ultrasound/scripts/simulation/environments/teleoperation/teleop_se3_agent.py b/workflows/robotic_ultrasound/scripts/simulation/environments/teleoperation/teleop_se3_agent.py index a54bfa13..42ead05e 100644 --- a/workflows/robotic_ultrasound/scripts/simulation/environments/teleoperation/teleop_se3_agent.py +++ b/workflows/robotic_ultrasound/scripts/simulation/environments/teleoperation/teleop_se3_agent.py @@ -282,7 +282,8 @@ def stop_teleoperation(): zero_out_xy_rotation=False, use_wrist_position=True, use_wrist_rotation=True, - delta_rot_scale_factor=3.0, + delta_rot_scale_factor=8.0, + delta_pos_scale_factor=8.0, ) grip_retargeter = GripperRetargeter(bound_hand=OpenXRDevice.TrackingTarget.HAND_RIGHT) @@ -301,7 +302,7 @@ def stop_teleoperation(): def process_hand_tracking_pose(delta_pose: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: # Define the transformation matrix (preprocessing step for wrist frame to world frame) transform_matrix = torch.tensor( - [[0, 0, 1, 0], [0, 1, 0, 0], [-1, 0, 0, 0], [0, 0, 0, 1]], + [[-1, 0, 0, 0], [0, 1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=torch.float32, device=env.unwrapped.device, ) @@ -314,7 +315,7 @@ def process_hand_tracking_pose(delta_pose: torch.Tensor) -> tuple[torch.Tensor, delta_pos = delta_pos_4d[:, :3] # Process hand tracking rotation (wrist -> world) - delta_rot = math_utils.quat_from_euler_xyz(-delta_pose[:, 3], delta_pose[:, 4], -delta_pose[:, 5]) + delta_rot = math_utils.quat_from_euler_xyz(delta_pose[:, 3], delta_pose[:, 4], delta_pose[:, 5]) delta_rot_matrix = math_utils.matrix_from_quat(delta_rot) delta_rot_matrix = torch.matmul( transform_matrix[:3, :3], torch.matmul(delta_rot_matrix, transform_matrix[:3, :3].T) diff --git a/workflows/robotic_ultrasound/scripts/simulation/evaluation/README.md b/workflows/robotic_ultrasound/scripts/simulation/evaluation/README.md index bca147ee..d19d2633 100644 --- a/workflows/robotic_ultrasound/scripts/simulation/evaluation/README.md +++ b/workflows/robotic_ultrasound/scripts/simulation/evaluation/README.md @@ -112,28 +112,31 @@ The `evaluate_trajectories.py` script generates several outputs to help you asse ### Example Evaluation Table -In our experiments, we utilized the `liver_scan_sm.py` script to collect an initial dataset of 400 raw trajectories. This dataset was then augmented using the Cosmos-transfer1 model to generate an additional 400 trajectories with diverse visual appearances (1:1 ratio with raw data), effectively creating a combined dataset for training and evaluation. The following table presents a comparison of success rates (at a 0.01m radius) for different policy models (Pi0 and GR00T-N1 variants) evaluated under various texture conditions in the simulated environment. +In our experiments, we utilized the `liver_scan_sm.py` script to collect an initial dataset of 400 raw trajectories. This dataset was then augmented using the Cosmos-transfer1 model to generate an additional 400 trajectories with diverse visual appearances (1:1 ratio with raw data), effectively creating a combined dataset for training and evaluation. The following table presents a comparison of success rates (at a 0.01m radius) for different policy models (Pi0 and GR00T-N1 variants) evaluated under various texture conditions in the simulated environment. Models with **-rel** suffix use **relative action space**, while models with **-abs** suffix use **absolute action space**. All models are trained using **full fine-tuning** (no LoRA). Our model was tested on both the original texture and several unseen textures. To enable these additional textures for testing, uncomment the `table_texture_randomizer` setting within the [environment configuration file](../exts/robotic_us_ext/robotic_us_ext/tasks/ultrasound/approach/config/franka/franka_manager_rl_env_cfg.py). **Evaluation Table: Success Rates (%) (@0.01m)** | Model | Original Texture | Texture 1 (Stainless Steel) | Texture 2 (Bamboo Wood) | Texture 3 (Walnut Wood) | |---------------------------------------|------------------|-----------------------------|-------------------------|-------------------------| -| Pi0-400 | 77.1 | 57.3 | 47.7 | 55.7 | -| Pi0-800 (w/ cosmos) | 77.0 | 71.7 | 72.4 | 70.5 | -| GR00T-N1-400 | 84.1 | 61.5 | 58.3 | 64.0 | -| GR00T-N1-800 (w/ cosmos) | 92.8 | 91.1 | 92.8 | 91.7 | +| Pi0-400-rel | 84.5 | 61.2 | 63.4 | 59.6 | +| GR00T-N1-400-rel | 84.1 | 61.5 | 58.3 | 64.0 | +| Pi0-800-rel (w/ cosmos) | 90.0 | 77.6 | 83.1 | 84.8 | +| GR00T-N1-800-rel (w/ cosmos) | 92.8 | 91.1 | 92.8 | 91.7 | +| Pi0-400-abs | 96.5 | 97.0 | 96.3 | 11.6 | +| GR00T-N1-400-abs | 99.3 | 10.6 | 19.1 | 20.4 | +| Pi0-800-abs (w/ cosmos) | 97.7 | 94.5 | 95.8 | 93.8 | +| GR00T-N1-800-abs (w/ cosmos) | 98.8 | 85.1 | 84.7 | 87.6 | ### Success Rate vs. Radius Plot - A plot named by the `--saved_compare_name` argument (default: `comparison_success_rate_vs_radius.png`) is saved in the `data_root` directory. - This plot shows the mean success rate (y-axis) as a function of the test radius (x-axis) for all configured prediction methods. - It includes 95% confidence interval bands for each method. -| Original Texture | Texture 1 (Stainless Steel) | Texture 2 (Bamboo Wood) | Texture 3 (Walnut Wood) | -|------------------|-----------------------------|-------------------------|-------------------------| -| ![Original Texture](../../../../../docs/source/comparison_avg_success_rate_vs_radius_original.png) | ![Stainless Texture](../../../../../docs/source/comparison_avg_success_rate_vs_radius_metalic.png) | ![Bamboo Wood](../../../../../docs/source/comparison_avg_success_rate_vs_radius_bamboo.png) | ![Walnut Wood](../../../../../docs/source/comparison_avg_success_rate_vs_radius_walnut.png) | + **Example Success Rate vs. Radius Plots:** + ![Success Rate vs Radius Example](../../../../../docs/source/comparison_avg_success_rate_vs_radius_original.png) -The plots visually represent these comparisons, where different models are typically color-coded (e.g., Green for the original Pi0 model, Red for Pi0 with Cosmos-transfer, Blue for the original GR00T-N1 model, and Yellow for GR00T-N1 with Cosmos-transfer). The x-axis represents the tolerance radius in meters, and the y-axis shows the corresponding mean success rate. The shaded areas around the lines indicate the 95% confidence intervals, providing a measure of result variability. +The example plot visually represents comparisons between different models, where each method is color-coded. The x-axis represents the tolerance radius in meters, and the y-axis shows the corresponding mean success rate. The shaded areas around the lines indicate the 95% confidence intervals, providing a measure of result variability. ### 3D Trajectory Plots - For each episode and each prediction method, a 3D plot is generated and saved. @@ -141,22 +144,18 @@ The plots visually represent these comparisons, where different models are typic - These plots visually compare the ground truth trajectory against the predicted trajectory. - The title of each plot includes the episode number, method name, success rate at `radius_for_plots`, and average minimum distance. - **Example 3D Trajectory Visualizations:** - To provide a qualitative view, example 3D trajectory visualizations from a selected episode (e.g., episode 14) are presented below for each model. + **Example 3D Trajectory Visualization:** - | Pi0-400 | Pi0-800 (w/ cosmos) | GR00T-N1-400 | GR00T-N1-800 (w/ cosmos) | - |------------------|-----------------------------|-------------------------|-------------------------| - | ![Pi0-400](../../../../../docs/source/3d_trajectories-5_Texture2-Pi0-wo.png) | ![Pi0-800](../../../../../docs/source/3d_trajectories-5_Texture2-Pi0-w.png) | ![GR00T-400](../../../../../docs/source/3d_trajectories-5_Texture2-GR00T-wo.png) | ![GR00T-800](../../../../../docs/source/3d_trajectories-5_Texture2-GR00T-w.png) | + ![3D Trajectory Example](../../../../../docs/source/3d_trajectories-5_Texture2-Pi0-w.png) -In these visualizations, the ground truth trajectory (derived from the 'scan' state) is depicted in black, while the colored line represents the predicted trajectory from the model. + In this visualization, the ground truth trajectory (derived from the 'scan' state) is depicted in black, while the colored line represents the predicted trajectory from the model. ### Key Observations and Conclusion -The evaluation results highlight several important findings: +The evaluation results from our experiments offer several insights into model performance. Models are trained with either relative action space (-rel suffix) or absolute action space (-abs suffix), all using full fine-tuning. -* **Impact of Cosmos-transfer:** Augmenting the training dataset with Cosmos-transfer (as seen in Pi0-800 and GR00T-N1-800 models) consistently and significantly improves the policy's success rate and robustness to diverse visual textures compared to models trained on original data alone (Pi0-400 and GR00T-N1-400). For instance, GR00T-N1-800 (w/ cosmos) maintains a success rate above 90% across all tested textures, a substantial improvement over GR00T-N1-400 which sees a performance drop on some textures. -* **Model Comparison:** The GR00T-N1 architecture generally outperforms the Pi0 architecture. The GR00T-N1-800 model, benefiting from both the advanced architecture and cosmos augmented data, demonstrates the highest overall performance and consistency according to the provided data. -* **Performance under Texture Variation:** Models trained without sufficient diverse data (e.g., Pi0-400, GR00T-N1-400) exhibit a noticeable degradation in performance when encountering textures different from the original training environment. Cosmos-transfer effectively mitigates this issue. -* **Success Rate vs. Radius Insights:** The success rate vs. radius plots are expected to further substantiate these findings. Models enhanced by Cosmos-transfer (notably GR00T-N1-800, potentially depicted by a yellow line as per the convention mentioned) would likely maintain higher success rates even at stricter (smaller) radius, indicating greater precision. Their 95% confidence intervals also provide insight into the stability of these performance gains. +* **Effect of Cosmos-transfer Data Augmentation:** In our tests, augmenting the training dataset with Cosmos-transfer appeared to enhance policy success rates and robustness to three tested unseen table textures when compared to models trained solely on the original dataset. For example, the GR00T-N1-800-rel model showed more consistent performance across tested textures. Data augmentation, while beneficial for diversity, does require additional computational resources for generating and processing the augmented samples. -These observations underscore the value of diverse, augmented datasets like those generated by Cosmos-transfer for training robust robotic policies, particularly for tasks involving visual perception in variable environments. The GR00T-N1 model, when combined with such data augmentation, shows promising results for reliable trajectory execution. +* **Reproducibility and Result Variability:** Users conducting their own evaluations might observe slightly different numerical results. This can be due to several factors, including inherent stochasticity in deep learning model training, variations in computational environments, and specific versions of software dependencies. For instance, initial explorations indicated that components like the `PaliGemma.llm` from OpenPI ([link](https://github.com/Physical-Intelligence/openpi/blob/main/src/openpi/models/pi0.py#L311)) could introduce variability. To ensure the stability and reliability of the findings presented here, the reported metrics for each model are an average of three independent evaluation runs. + +These observations highlight the potential benefits of data augmentation strategies like Cosmos-transfer for developing robotic policies, especially for tasks involving visual perception in dynamic environments. The choice of model architecture, training duration , and training methodology (e.g., relative action space, whether to employ LoRA, and whether fine-tune LLM) are all important factors influencing final performance. Further investigation and testing across a wider range of scenarios are always encouraged. diff --git a/workflows/telesurgery/README.md b/workflows/telesurgery/README.md index 489e107f..bf6c74dc 100644 --- a/workflows/telesurgery/README.md +++ b/workflows/telesurgery/README.md @@ -1,162 +1,195 @@ # Telesurgery Workflow ![Telesurgery Workflow](../../docs/source/telesurgery_workflow.jpg) +The Telesurgery Workflow is a cutting-edge solution designed for healthcare professionals and researchers working in the field of remote surgical procedures. This workflow provides a comprehensive framework for enabling and analyzing remote surgical operations, leveraging NVIDIA's advanced GPU capabilities to ensure real-time, high-fidelity surgical interactions across distances. It enables surgeons to perform complex procedures remotely, researchers to develop new telemedicine techniques, and medical institutions to expand their reach to underserved areas. By offering a robust platform for remote surgical operations, this workflow helps improve healthcare accessibility, reduce geographical barriers to specialized care, and advance the field of telemedicine. -## Table of Contents -- [System Requirements](#system-requirements) -- [Quick Start](#quick-start) -- [Running the Workflow](#running-the-workflow) -- [Licensing](#licensing) -## System Requirements +- [Telesurgery Workflow](#telesurgery-workflow) + - [Prerequisites](#prerequisites) + - [System Requirements](#system-requirements) + - [Common Setup](#common-setup) + - [Running the System](#running-the-system) + - [Real World Environment](#real-world-environment) + - [Simulation Environment](#simulation-environment) + - [Advanced Configuration](#advanced-configuration) + - [NTP Server Setup](#ntp-server-setup) + - [NVIDIA Video Codec Configuration](#advanced-nvidia-video-codec-configuration) + - [Troubleshooting](#troubleshooting) + - [Common Issues](#common-issues) + - [Licensing](#licensing) -### Hardware Requirements -- Ubuntu 22.04 -- NVIDIA GPU with compute capability 8.6 and 32GB of memory - - GPUs without RT Cores, such as A100 and H100, are not supported -- 50GB of disk space -### Software Requirements -- NVIDIA Driver Version >= 555 -- CUDA Version >= 12.6 -- Python 3.10 -- RTI DDS License +## Prerequisites -## Quick Start +### System Requirements -### x86 & AARCH64 (IGX) Setup +#### Hardware Requirements +- Ubuntu >= 22.04 +- NVIDIA GPU with compute capability 8.6 and 24GB of memory ([see NVIDIA's compute capability guide](https://developer.nvidia.com/cuda-gpus#compute)) + - GPUs without RT Cores, such as A100 and H100, are not supported +- 50GB of disk space +- XBOX Controller or Haply Inverse 3. -1. **Set up a Docker environment with CUDA enabled (IGX only):** - ```bash - cd - xhost + - workflows/telesurgery/docker/setup.sh run - # Inside Docker - workflows/telesurgery/docker/setup.sh init - ``` +#### Software Requirements +- [NVIDIA Driver Version >= 570](https://developer.nvidia.com/cuda-downloads) +- [CUDA Version >= 12.8](https://developer.nvidia.com/cuda-downloads) +- Python 3.10 +- [RTI DDS License](https://www.rti.com/free-trial) +- [Docker](https://docs.docker.com/engine/install/) 28.0.4+ +- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) 1.17.5+ -2. **Set up the x86 environment with CUDA enabled:** - ```bash - cd - xhost + - workflows/telesurgery/docker/setup.sh init - ``` +### Common Setup -3. **Create and activate a [conda](https://www.anaconda.com/docs/getting-started/miniconda/install#quickstart-install-instructions) environment:** - ```bash - source ~/miniconda3/bin/activate - conda create -n telesurgery python=3.10 -y - conda activate telesurgery - ``` +#### 1. RTI DDS License Setup +```bash +export RTI_LICENSE_FILE= +# for example +export RTI_LICENSE_FILE=/home/username/rti/rti_license.dat +``` -4. **Run the setup script:** - ```bash - cd - bash tools/env_setup_telesurgery.sh - ``` +> [!Note] +> RTI DDS is the common communication package for all scripts. Please refer to [DDS website](https://www.rti.com/products) for registration. You will need to obtain a license file and set the `RTI_LICENSE_FILE` environment variable to its path. -> Make sure your public key is added to the github account if the git authentication fails. +#### 2. Environment Configuration +When running the Patient and the Surgeon applications on separate systems, export the following environment variables: -### Obtain RTI DDS License +```bash +export PATIENT_IP="" +export SURGEON_IP="" -RTI DDS is the communication package used by all scripts. Please refer to the [DDS website](https://www.rti.com/products) for registration. You will need to obtain a license file and set the `RTI_LICENSE_FILE` environment variable to its path. +# Export the following for NTP Server (Optional) +export NTP_SERVER_HOST="" +export NTP_SERVER_PORT="123" +``` -### NTP Server (Optional) +> [!Note] +> For NTP settings and variables, refer to the [NTP (Network Time Protocol) Server](#ntp-server-setup) section for additional details. -An NTP (Network Time Protocol) server provides accurate time information to clients over a computer network. NTP is designed to synchronize the clocks of computers to a reference time source, ensuring all devices on the network maintain the same time. +## Running the System + +### Real World Environment +#### 1. Build Environment ```bash -# Run your own NTP server in the background -docker run -d --name ntp-server --restart=always -p 123:123/udp cturra/ntp +cd +workflows/telesurgery/docker/real.sh build +``` -# Check if it's running -docker logs ntp-server +#### 2. Running Applications -# fix server ip in env.sh for NTP Server -export NTP_SERVER_HOST= +##### Patient Application +```bash +# Start the Docker Container +workflows/telesurgery/docker/real.sh run -# To stop the server -# docker stop ntp-server && docker rm ntp-server -``` +# Using RealSense Camera with NVIDIA H.264 Encoder +python patient/physical/camera.py --camera realsense --name room --width 1280 --height 720 -### Environment Variables +# Using CV2 Camera with NVIDIA H.264 Encoder +python patient/physical/camera.py --camera cv2 --name robot --width 1920 --height 1080 -Before running any scripts, set up the following environment variables: +# Using RealSense Camera with NVJPEG Encoder +python patient/physical/camera.py --camera realsense --name room --width 1280 --height 720 --encoder nvjpeg -1. **PYTHONPATH**: Set this to point to the **scripts** directory: - ```bash - export PYTHONPATH=/workflows/telesurgery/scripts - ``` - This ensures Python can find the modules under the [`scripts`](./scripts) directory. +# Using CV2 Camera with NVJPEG Encoder +python patient/physical/camera.py --camera cv2 --name robot --width 1920 --height 1080 --encoder nvjpeg +``` -2. **RTI_LICENSE_FILE**: Set this to point to your RTI DDS license file: - ```bash - export RTI_LICENSE_FILE= - ``` - This is required for the DDS communication package to function properly. +##### Surgeon Application +```bash +# Start the Docker Container +workflows/telesurgery/docker/real.sh run -3. **NDDS_DISCOVERY_PEERS**: Set this to the IP address receiving camera data: - ```bash - export NDDS_DISCOVERY_PEERS="surgeon IP address" - ``` -More recommended variables can be found in [env.sh](./scripts/env.sh). +# Start the Surgeon Application with NVIDIA H.264 Decoder +python surgeon/camera.py --name [robot|room] --width 1280 --height 720 2> /dev/null -## Running the Workflow +# Run the Surgeon Application with NVJPEG Decoder +python surgeon/camera.py --name [robot|room] --width 1280 --height 720 --decoder nvjpeg +``` +##### Gamepad Controller Application ```bash -cd /workflows/telesurgery/scripts -source env.sh # Make sure all env variables are correctly set in env.sh +# Start the Docker Container +workflows/telesurgery/docker/real.sh run -export PATIENT_IP= -export SURGEON_IP= +# Run the Gamepad Controller Application +python surgeon/gamepad.py --api_host ${PATIENT_IP} --api_port 8081 ``` -> Make sure the MIRA API Server is up and running (port: 8081) in the case of a physical world setup. -### [Option 1] Patient in Physical World _(x86 / aarch64)_ - -When running on IGX (aarch64), ensure you are in the Docker environment set up previously. +### Simulation Environment +#### 1. Build Environment ```bash -# Stream camera output -python patient/physical/camera.py --camera realsense --name room --width 1280 --height 720 -python patient/physical/camera.py --camera cv2 --name robot --width 1920 --height 1080 +cd +workflows/telesurgery/docker/sim.sh build ``` -### [Option 2] Patient in Simulation World _(x86)_ +#### 2. Running Applications +##### Patient Application ```bash -# Download the assets -i4h-asset-retrieve +# Start the Docker Container +workflows/telesurgery/docker/sim.sh run -python patient/simulation/main.py [--encoder nvc] +# Start the Patient Application with NVIDIA H.264 Encoder +python patient/simulation/main.py + +# Start the Patient Application with NVJPEG Encoder +python patient/simulation/main.py --encoder nvjpeg ``` -### Surgeon Connecting to Patient _(x86 / aarch64)_ +##### Surgeon Application +```bash +# Start the Docker Container +workflows/telesurgery/docker/sim.sh run -# capture robot camera stream -NDDS_DISCOVERY_PEERS=${PATIENT_IP} python surgeon/camera.py --name robot --width 1280 --height 720 [--decoder nvc] +# Start the Surgeon Application with NVIDIA H.264 Decoder +python surgeon/camera.py --name robot --width 1280 --height 720 2> /dev/null -# capture room camera stream (optional) -NDDS_DISCOVERY_PEERS=${PATIENT_IP} python surgeon/camera.py --name room --width 1280 --height 720 [--decoder nvc] +# Run the Surgeon Application with NVJPEG Decoder +python surgeon/camera.py --name robot --width 1280 --height 720 --decoder nvjpeg +``` + +##### Gamepad Controller Application +```bash +# Start the Docker Container +workflows/telesurgery/docker/sim.sh run -# Connect to gamepad controller and send commands to API Server +# Run the Gamepad Controller Application python surgeon/gamepad.py --api_host ${PATIENT_IP} --api_port 8081 ``` -### Using H.264/HEVC Encoder/Decoder from NVIDIA Video Codec +## Advanced Configuration + +### NTP Server Setup +An NTP (Network Time Protocol) server provides accurate time information to clients over a computer network. NTP is designed to synchronize the clocks of computers to a reference time source, ensuring all devices on the network maintain the same time. + +```bash +# Run your own NTP server in the background +docker run -d --name ntp-server --restart=always -p 123:123/udp cturra/ntp -Camera data can be streamed using either the H.264 or HEVC (H.265) codecs. To enable this for the Patient and Surgeon applications, use the `--encoder nvc` or `--decoder nvc` argument, respectively. +# Check if it's running +docker logs ntp-server -Encoding parameters can be customized in the Patient application using the `--encoder_params` argument, as shown below: +# fix server ip in env.sh for NTP Server +export NTP_SERVER_HOST= + +# To stop the server +docker stop ntp-server && docker rm ntp-server +``` + +### Advanced NVIDIA Video Codec Configuration + +The applications streams H.264 by default using NVIDIA Video Codec. Additional encoding parameters can be customized in the Patient application using the `--encoder_params` argument: ```bash python patient/simulation/main.py --encoder nvc --encoder_params patient/nvc_encoder_params.json ``` -#### Sample Encoding Parameters for the NVIDIA Video Codec +#### Sample Encoding Parameters -Here’s an example of encoding parameters in JSON format: +Here's an example of encoding parameters in JSON format: ```json { @@ -169,16 +202,55 @@ Here’s an example of encoding parameters in JSON format: } ``` -> [!NOTE] -> H.264 or HEVC (H.265) codecs are available on x86 platform only. +### Advanced NVJPEG Configuration + +Adjust the quality of encoded frames using the NVJPEG encoder by editing the [nvjpeg_encoder_params.json](./scripts/patient/nvjpeg_encoder_params.json) file. Simply change the quality parameter to a value between 1 and 100: -### Important Notes -1. You may need to run multiple scripts simultaneously in different terminals or run in background (in case of docker) -2. A typical setup requires multiple terminals running: - - Patient: Camera1, Camera2, Controller, etc. - - Surgeon: Camera1, Camera2, Controller, etc. +```json +{ + "quality": 90 +} +``` -If you encounter issues not covered in the notes above, please check the documentation for each component or open a new issue on GitHub. +## Troubleshooting + +### Common Issues + +#### Docker Build Error +Q: I get the following error when building the Docker image: +```bash +ERROR: invalid empty ssh agent socket: make sure SSH_AUTH_SOCK is set +``` + +A: Start the ssh-agent +```bash +eval "$(ssh-agent -s)" && ssh-add +``` +#### Unable to launch the applications when using NVIDIA Video Codec + +Q: I'm getting an error when I start the application with the NVIDIA Video Codec. + +```BASH +[error] [nv_video_encoder.cpp:101] Failed to create encoder: LoadNvEncApi : Current Driver Version does not support this NvEncodeAPI version, please upgrade driver at /workspace/holohub/build/nvidia_video_codec/_deps/nvc_sdk/NvEncoder/NvEncoder.cpp:82 +``` + +**A:** NVIDIA Video Codec requires CUDA version 12 (driver version 570.0) or later. Check out the [NVIDIA Video Codec System Requirements](https://developer.nvidia.com/nvidia-video-codec-sdk/download) section for more details. ** + + +#### Update CUDA Driver on IGX +```bash +# ssh to igx-host to run the following commands +sudo systemctl isolate multi-user + +sudo apt purge nvidia-kernel-* +sudo add-apt-repository ppa:graphics-drivers/ppa +sudo apt update + +sudo apt-get -y install linux-headers-nvidia-tegra aptitude +sudo aptitude install nvidia-driver-570-open # Resolve any conflicts + +# hard reboot igx (soft reboot may not work) +``` ## Licensing diff --git a/workflows/telesurgery/docker/Dockerfile b/workflows/telesurgery/docker/Dockerfile index 124b2dd0..60e12edf 100644 --- a/workflows/telesurgery/docker/Dockerfile +++ b/workflows/telesurgery/docker/Dockerfile @@ -15,13 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM nvcr.io/nvidia/isaac-lab:2.0.2 +ARG BASE_IMAGE=nvcr.io/nvidia/clara-holoscan/holoscan:v3.2.0-dgpu +FROM $BASE_IMAGE WORKDIR /workspace - -# Fix livestream public endpoint address issue in 2.0.2/2.1.0 -RUN sed -i '/--\/app\/livestream\/publicEndpointAddress=/d' /workspace/isaaclab/source/isaaclab/isaaclab/app/app_launcher.py - # Install uv using curl for openpi RUN apt-get update && \ apt-get install -y software-properties-common && \ @@ -50,32 +47,19 @@ RUN apt-get update && \ update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 && \ mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts -# Install CUDA 12.8 - -WORKDIR /tmp - -RUN apt-get update && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ - dpkg -i cuda-keyring_1.1-1_all.deb && \ - apt-get update && \ - apt-get -y install cuda-toolkit-12-8 - ENV PATH=/usr/local/cuda-12.8/bin${PATH:+:${PATH}} ENV LD_LIBRARY_PATH=/usr/local/cuda-12.8/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} -COPY workflows/telesurgery/scripts /workspace/telesurgery/scripts - -ENV PYTHON_EXECUTABLE=/workspace/isaaclab/_isaac_sim/python.sh - -# # Set up the Simulation -RUN --mount=type=ssh \ - $PYTHON_EXECUTABLE -m pip install --no-deps \ - git+ssh://git@github.com/isaac-for-healthcare/i4h-asset-catalog.git@v0.2.0rc1 - -RUN mkdir -p /workspace/third_party - -RUN $PYTHON_EXECUTABLE -m pip install numpy - -RUN $PYTHON_EXECUTABLE -m pip install -r /workspace/telesurgery/scripts/requirements.txt +COPY workflows/telesurgery/requirements.txt /tmp/requirements.txt +RUN python3 -m pip install "numpy<2.0.0" \ + && python3 -m pip install --no-build-isolation pynvjpeg==0.0.13 \ + && python3 -m pip install -r /tmp/requirements.txt +RUN mkdir -p /root/rti +ENV PATH=/isaac-sim/kit/python/bin/:/usr/local/cuda-12.8/bin${PATH:+:${PATH}} +ENV LD_LIBRARY_PATH=/opt/nvidia/holoscan/lib/:/usr/local/cuda-12.8/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} +ENV LD_LIBRARY_PATH=/workspace/i4h-workflows/workflows/telesurgery/scripts/holohub/operators/nvidia_video_codec/libs:$LD_LIBRARY_PATH ENV RTI_LICENSE_FILE=/root/rti/rti_license.dat +ENV PYTHONPATH=/workspace/i4h-workflows/workflows/telesurgery/scripts + +WORKDIR /workspace/i4h-workflows/workflows/telesurgery/scripts diff --git a/workflows/telesurgery/docker/Dockerfile.sim b/workflows/telesurgery/docker/Dockerfile.sim new file mode 100644 index 00000000..65a61a73 --- /dev/null +++ b/workflows/telesurgery/docker/Dockerfile.sim @@ -0,0 +1,83 @@ +# syntax=docker/dockerfile:1.4 + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM nvcr.io/nvidia/isaac-lab:2.1.0 + +# Fix livestream public endpoint address issue in 2.0.2/2.1.0 +RUN sed -i '/--\/app\/livestream\/publicEndpointAddress=/d' /workspace/isaaclab/source/isaaclab/isaaclab/app/app_launcher.py + +# Install uv using curl for openpi +RUN apt-get update && \ + apt-get install -y software-properties-common && \ + add-apt-repository ppa:ubuntu-toolchain-r/test && \ + apt-get update && \ + apt-get install -y \ + curl \ + openssh-client \ + cmake \ + wget \ + build-essential \ + pybind11-dev \ + lsb-release \ + libglib2.0-0 \ + libdbus-1-3 \ + libopengl0 \ + libxcb-keysyms1 \ + libxcb-cursor0 \ + ninja-build \ + libgl1-mesa-dev \ + ffmpeg \ + v4l-utils \ + gcc-12 \ + g++-12 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 && \ + mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts + +# Install Holoscan SDK +WORKDIR /tmp + +RUN apt-get update && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + apt-get update && \ + apt-get -y install holoscan=3.2.0.2-1 libnvjpeg-dev-12-6 + +ENV PYTHON_EXECUTABLE=/workspace/isaaclab/_isaac_sim/python.sh + +RUN update-alternatives --install /usr/bin/python python $PYTHON_EXECUTABLE 100 \ + && update-alternatives --install /usr/bin/python3 python3 $PYTHON_EXECUTABLE 100 + +# # Set up the Simulation +RUN --mount=type=ssh \ + $PYTHON_EXECUTABLE -m pip install --no-deps \ + git+ssh://git@github.com/isaac-for-healthcare/i4h-asset-catalog.git@main + +# Install Python dependencies +COPY workflows/telesurgery/requirements.txt /tmp/requirements.txt +RUN $PYTHON_EXECUTABLE -m pip install "numpy<2.0.0" \ + && $PYTHON_EXECUTABLE -m pip install --no-build-isolation pynvjpeg==0.0.13 \ + && $PYTHON_EXECUTABLE -m pip install -r /tmp/requirements.txt + +RUN mkdir -p /root/rti +ENV PATH=/isaac-sim/kit/python/bin/:/usr/local/cuda-12.8/bin${PATH:+:${PATH}} +ENV LD_LIBRARY_PATH=/opt/nvidia/holoscan/lib/:/usr/local/cuda-12.8/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} +ENV LD_LIBRARY_PATH=/workspace/i4h-workflows/workflows/telesurgery/scripts/holohub/operators/nvidia_video_codec/libs:$LD_LIBRARY_PATH +ENV RTI_LICENSE_FILE=/root/rti/rti_license.dat +ENV ISAAC_ASSET_SHA256_HASH=8e80faed126c533243f50bb01dca3dcf035e86b5bf567d622878866a8ef7f12d + +WORKDIR /workspace/i4h-workflows/workflows/telesurgery/scripts diff --git a/workflows/telesurgery/docker/real.sh b/workflows/telesurgery/docker/real.sh new file mode 100755 index 00000000..2e7394ad --- /dev/null +++ b/workflows/telesurgery/docker/real.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/" >/dev/null 2>&1 && pwd)" + +# Source environment variables at script level +source $SCRIPT_DIR/../scripts/env.sh + +# Source common utilities +source $SCRIPT_DIR/utils.sh + +HOLOHUB_DIR=$SCRIPT_DIR/../scripts/holohub +DOCKER_IMAGE=telesurgery:0.2 +CONTAINER_NAME=telesurgery + +function build() { + BASE_IMAGE=nvcr.io/nvidia/clara-holoscan/holoscan:v3.2.0-$(get_host_gpu) + echo "Building Telesurgery Docker Image using ${BASE_IMAGE}" + docker build --ssh default \ + --build-arg BASE_IMAGE=$BASE_IMAGE \ + -t $DOCKER_IMAGE \ + -f workflows/telesurgery/docker/Dockerfile . + download_operators +} + +function run() { + if [ ! -f "$RTI_LICENSE_FILE" ]; then + echo "RTI_LICENSE_FILE is not set or does not exist" + exit 1 + fi + + xhost + + + local OTHER_ARGS="" + if [ ! -z "${NTP_SERVER_HOST}" ] && [ ! -z "${NTP_SERVER_PORT}" ]; then + OTHER_ARGS="-e NTP_SERVER_HOST=${NTP_SERVER_HOST} -e NTP_SERVER_PORT=${NTP_SERVER_PORT}" + fi + + docker run --rm -ti \ + --runtime=nvidia \ + --gpus all \ + --entrypoint "/bin/bash" \ + --ipc=host \ + --network=host \ + --privileged \ + --volume $SSH_AUTH_SOCK:/ssh-agent \ + -e ACCEPT_EULA=Y \ + -e PRIVACY_CONSENT=Y \ + -e DISPLAY \ + -e XDG_RUNTIME_DIR \ + -e XDG_SESSION_TYPE \ + -e SSH_AUTH_SOCK=/ssh-agent \ + -e PATIENT_IP="$PATIENT_IP" \ + -e SURGEON_IP="$SURGEON_IP" \ + -v /tmp/.X11-unix:/tmp/.X11-unix \ + -v $HOME/.Xauthority:/root/.Xauthority \ + -v /dev:/dev \ + -v $(pwd):/workspace/i4h-workflows \ + -v ${RTI_LICENSE_FILE}:/root/rti/rti_license.dat \ + $(for dev in /dev/video*; do echo --device=$dev; done) \ + $OTHER_ARGS \ + $DOCKER_IMAGE \ + -c "source /workspace/i4h-workflows/workflows/telesurgery/scripts/env.sh && /workspace/i4h-workflows/workflows/telesurgery/docker/real.sh init && exec bash" +} + +function init() { + echo "Initializing Telesurgery environment ..." + echo " Patient IP: ${PATIENT_IP}" + echo " Surgeon IP: ${SURGEON_IP}" + echo " DDS Discovery IP: ${NDDS_DISCOVERY_PEERS}" +} + +$@ diff --git a/workflows/telesurgery/docker/setup.sh b/workflows/telesurgery/docker/setup.sh deleted file mode 100755 index 16a30d21..00000000 --- a/workflows/telesurgery/docker/setup.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -function run() { - docker run --gpus all --rm -ti \ - --ipc=host \ - --net=host \ - $(for dev in /dev/video*; do echo --device=$dev; done) \ - --env DISPLAY=$DISPLAY \ - --volume /tmp/.X11-unix:/tmp/.X11-unix \ - --privileged \ - -v /dev:/dev \ - -v $(pwd):/workspace/i4h-workflows \ - -w /workspace/i4h-workflows \ - nvcr.io/nvidia/clara-holoscan/holoscan:v3.2.0-dgpu \ - bash -} - -function init() { - rm -rf .venv uv.lock - apt update - apt install v4l-utils ffmpeg -y - - v4l2-ctl --list-devices - - # Install Miniconda - mkdir -p ~/miniconda3 - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-`uname -m`.sh -O ~/miniconda3/miniconda.sh - bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 - rm ~/miniconda3/miniconda.sh -} - -$@ diff --git a/workflows/telesurgery/docker/sim.sh b/workflows/telesurgery/docker/sim.sh new file mode 100755 index 00000000..6bd7e5f8 --- /dev/null +++ b/workflows/telesurgery/docker/sim.sh @@ -0,0 +1,108 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/" >/dev/null 2>&1 && pwd)" + +# Source environment variables at script level +source $SCRIPT_DIR/../scripts/env.sh + +# Source common utilities +source $SCRIPT_DIR/utils.sh + +HOLOHUB_DIR=$SCRIPT_DIR/../scripts/holohub +DOCKER_IMAGE=telesurgery:0.2 +CONTAINER_NAME=telesurgery-sim + +function build() { + if [ -L $SCRIPT_DIR/../../../third_party/IssacLab ]; then + rm $SCRIPT_DIR/../../../third_party/IssacLab + fi + docker build --ssh default -t $DOCKER_IMAGE -f workflows/telesurgery/docker/Dockerfile.sim . + download_operators +} + +function run() { + if [ ! -f "$RTI_LICENSE_FILE" ]; then + echo "RTI_LICENSE_FILE is not set or does not exist" + exit 1 + fi + + xhost + + + local OTHER_ARGS="" + if [ ! -z "${NTP_SERVER_HOST}" ] && [ ! -z "${NTP_SERVER_PORT}" ]; then + OTHER_ARGS="-e NTP_SERVER_HOST=${NTP_SERVER_HOST} -e NTP_SERVER_PORT=${NTP_SERVER_PORT}" + fi + + docker run --rm -ti \ + --runtime=nvidia \ + --gpus all \ + --entrypoint "/bin/bash" \ + --ipc=host \ + --network=host \ + --privileged \ + --volume $SSH_AUTH_SOCK:/ssh-agent \ + -e ACCEPT_EULA=Y \ + -e PRIVACY_CONSENT=Y \ + -e DISPLAY \ + -e XDG_RUNTIME_DIR \ + -e XDG_SESSION_TYPE \ + -e SSH_AUTH_SOCK=/ssh-agent \ + -e PATIENT_IP="$PATIENT_IP" \ + -e SURGEON_IP="$SURGEON_IP" \ + $OTHER_ARGS \ + -v /tmp/.X11-unix:/tmp/.X11-unix \ + -v $HOME/.Xauthority:/root/.Xauthority \ + -v /dev:/dev \ + -v ~/docker/telesurgery/cache/computecache:/root/.nv/ComputeCache:rw \ + -v ~/docker/telesurgery/cache/glcache:/root/.cache/nvidia/GLCache:rw \ + -v ~/docker/telesurgery/cache/i4h-assets:/root/.cache/i4h-assets:rw \ + -v ~/docker/telesurgery/cache/kit:/isaac-sim/kit/cache:rw \ + -v ~/docker/telesurgery/cache/ov:/root/.cache/ov:rw \ + -v ~/docker/telesurgery/cache/pip:/root/.cache/pip:rw \ + -v ~/docker/telesurgery/data:/root/.local/share/ov/data:rw \ + -v ~/docker/telesurgery/documents:/root/Documents:rw \ + -v ~/docker/telesurgery/logs:/root/.nvidia-omniverse/logs:rw \ + -v $(pwd):/workspace/i4h-workflows \ + -v ${RTI_LICENSE_FILE}:/root/rti/rti_license.dat \ + $(for dev in /dev/video*; do echo --device=$dev; done) \ + $OTHER_ARGS \ + $DOCKER_IMAGE \ + -c "source /workspace/i4h-workflows/workflows/telesurgery/scripts/env.sh && /workspace/i4h-workflows/workflows/telesurgery/docker/sim.sh init && exec bash" +} + +function enter() { + docker exec -it $CONTAINER_NAME /bin/bash +} + +function init() { + echo "Initializing Telesurgery environment ..." + mkdir -p $SCRIPT_DIR/../../../third_party + ln -s /workspace/isaaclab /workspace/i4h-workflows/third_party/IssacLab + + if [ ! -d "/root/.cache/i4h-assets/$ISAAC_ASSET_SHA256_HASH" ]; then + echo "Please wait while downloading i4h-assets (Props)..." + yes Yes | i4h-asset-retrieve --sub-path Props | grep -v "Skipping download" + echo "Please wait while downloading i4h-assets (Robots)..." + yes Yes | i4h-asset-retrieve --sub-path Robots | grep -v "Skipping download" + fi + echo " Patient IP: ${PATIENT_IP}" + echo " Surgeon IP: ${SURGEON_IP}" + echo " DDS Discovery IP: ${NDDS_DISCOVERY_PEERS}" +} + +$@ diff --git a/workflows/telesurgery/docker/utils.sh b/workflows/telesurgery/docker/utils.sh new file mode 100755 index 00000000..6f584d4b --- /dev/null +++ b/workflows/telesurgery/docker/utils.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Common utility functions for telesurgery workflow + +function download_operators() { + # Check if HOLOHUB_DIR is set, otherwise try to determine it + if [ -z "$HOLOHUB_DIR" ]; then + echo "Error: HOLOHUB_DIR is not set" + return 1 + fi + + local FILE_NAME=holohub_nv_video_codec_operators_0.1.zip + VIDEO_CODEC_FILENAME=/tmp/${FILE_NAME} + UNZIP_DIR=/tmp/${FILE_NAME%%.*} + + if [ ! -d "$HOLOHUB_DIR/operators/nvidia_video_codec/libs" ]; then + echo "Downloading NVIDIA Video Codec Operators" + curl -L -o $VIDEO_CODEC_FILENAME "https://edge.urm.nvidia.com:443/artifactory/sw-holoscan-cli-generic/holohub/operators/$FILE_NAME" + unzip -o -q $VIDEO_CODEC_FILENAME -d $UNZIP_DIR + rm $VIDEO_CODEC_FILENAME + mv $UNZIP_DIR/$(uname -p)/* $HOLOHUB_DIR/operators/nvidia_video_codec + rm -rf $UNZIP_DIR + fi +} + +get_host_gpu() { + if ! command -v nvidia-smi >/dev/null; then + echo "Could not find any GPU drivers on host. Defaulting build to target dGPU/CPU stack." >&2 + echo -n "dgpu"; + elif [[ ! $(nvidia-smi --query-gpu=name --format=csv,noheader) ]] || \ + [[ $(nvidia-smi --query-gpu=name --format=csv,noheader -i 0) =~ "Orin (nvgpu)" ]]; then + echo -n "igpu"; + else + echo -n "dgpu"; + fi +} diff --git a/workflows/telesurgery/requirements.txt b/workflows/telesurgery/requirements.txt index 141b9fb4..edf16c81 100644 --- a/workflows/telesurgery/requirements.txt +++ b/workflows/telesurgery/requirements.txt @@ -1,22 +1,22 @@ -rti.connext -numpy<2.0.0 -pydantic -pillow -opencv-python-headless -pyrealsense2 +boto3 +botocore holoscan==3.2.0 ntplib +numpy<2.0.0 +opencv-python-headless +pillow +pydantic pygame -websockets +pyrealsense2 +requests +rti.connext +setuptools websocket-client websocket-server -setuptools -requests +websockets --extra-index-url https://download.pytorch.org/whl/cu124 torch; platform_machine == 'x86_64' --extra-index-url https://pypi.nvidia.com -isaacsim[allextscache]==4.5.0; platform_machine == 'x86_64' - -pynvjpeg +isaacsim[all,extscache]==4.5.0; platform_machine == 'x86_64' diff --git a/workflows/telesurgery/scripts/dds/README.md b/workflows/telesurgery/scripts/dds/README.md index aba6385e..446e09b3 100644 --- a/workflows/telesurgery/scripts/dds/README.md +++ b/workflows/telesurgery/scripts/dds/README.md @@ -24,6 +24,6 @@ For comprehensive troubleshooting guidance, refer to the [RTI Connext DDS Gettin ```bash export PATH=$PATH:/opt/rti.com/rti_connext_dds-7.5.0/bin -rtiddsping -domainId 779 -publisher -rtiddsping -domainId 779 -subscriber -peer +rtiddsping -domainId 9 -publisher -peer +rtiddsping -domainId 9 -subscriber -peer ``` diff --git a/workflows/telesurgery/scripts/env.sh b/workflows/telesurgery/scripts/env.sh index eb907df4..96335f7f 100755 --- a/workflows/telesurgery/scripts/env.sh +++ b/workflows/telesurgery/scripts/env.sh @@ -15,35 +15,29 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/" >/dev/null 2>&1 && pwd)" +ENV_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/" >/dev/null 2>&1 && pwd)" + # Host IP for Patient/Surgeon -export PATIENT_IP="patient IP address" -export SURGEON_IP="surgeon IP address" +export PATIENT_IP="${PATIENT_IP:-127.0.0.1}" +export SURGEON_IP="${SURGEON_IP:-127.0.0.1}" -# RTI QOS Profile -export NDDS_QOS_PROFILES=$SCRIPT_DIR/dds/qos_profile.xml +export NDDS_DISCOVERY_PEERS="${PATIENT_IP},${SURGEON_IP}" -# RTI Discovery Address -export NDDS_DISCOVERY_PEERS=$PATIENT_IP +# RTI QOS Profile +export NDDS_QOS_PROFILES=$ENV_SCRIPT_DIR/dds/qos_profile.xml # RTI License if [ -z "${RTI_LICENSE_FILE}" ]; then - export RTI_LICENSE_FILE=$SCRIPT_DIR/dds/rti_license.dat + export RTI_LICENSE_FILE=$ENV_SCRIPT_DIR/dds/rti_license.dat fi # Python Path -export PYTHONPATH=$SCRIPT_DIR -export LD_LIBRARY_PATH=$SCRIPT_DIR/holohub/operators/nvidia_video_codec/lib:$LD_LIBRARY_PATH +export PYTHONPATH=$ENV_SCRIPT_DIR:$PYTHONPATH +export LD_LIBRARY_PATH=$ENV_SCRIPT_DIR/holohub/operators/nvidia_video_codec/lib:$LD_LIBRARY_PATH # Optional: NTP Server to capture time diff between 2 nodes -# export NTP_SERVER_HOST="ntp server address" -# export NTP_SERVER_PORT=123 - -VIDEO_CODEC_FILE=/tmp/nv_video_codec.zip -if [ ! -d "$SCRIPT_DIR/holohub/operators/nvidia_video_codec/lib" ]; then - echo "Downloading NVIDIA Video Codec Operators" - curl -L -o $VIDEO_CODEC_FILE 'https://edge.urm.nvidia.com:443/artifactory/sw-holoscan-cli-generic/holohub/operators/nv_video_codec_0.2.zip' - unzip -o $VIDEO_CODEC_FILE -d $SCRIPT_DIR/holohub/operators/nvidia_video_codec - rm $VIDEO_CODEC_FILE +if [ ! -z "${NTP_SERVER_HOST}" ] && [ ! -z "${NTP_SERVER_PORT}" ]; then + export NTP_SERVER_HOST="${NTP_SERVER_HOST}" + export NTP_SERVER_PORT="${NTP_SERVER_PORT}" fi diff --git a/workflows/telesurgery/scripts/holohub/operators/camera/cv2.py b/workflows/telesurgery/scripts/holohub/operators/camera/cv2.py index 3a39fb5b..e7ee3e53 100644 --- a/workflows/telesurgery/scripts/holohub/operators/camera/cv2.py +++ b/workflows/telesurgery/scripts/holohub/operators/camera/cv2.py @@ -46,7 +46,7 @@ def __init__(self, fragment, width: int, height: int, device_idx: int, framerate self.framerate = framerate self.stream_type = 2 # color - self.stream_format = 5 # rgb + self.stream_format = 7 # rgba self.ntp_offset_time = get_ntp_offset() super().__init__(fragment, *args, **kwargs) @@ -72,7 +72,7 @@ def compute(self, op_input, op_output, context): if not ret: raise RuntimeError("Failed to capture frame from camera") - data = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + data = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA) ts = int((time.time() + self.ntp_offset_time) * 1000) stream = CameraStream( diff --git a/workflows/telesurgery/scripts/holohub/operators/camera/realsense.py b/workflows/telesurgery/scripts/holohub/operators/camera/realsense.py index bf535289..6ed9643c 100644 --- a/workflows/telesurgery/scripts/holohub/operators/camera/realsense.py +++ b/workflows/telesurgery/scripts/holohub/operators/camera/realsense.py @@ -63,7 +63,7 @@ def __init__( if stream_format else rs.format.z16 if stream_type == "depth" - else rs.format.rgb8 + else rs.format.rgba8 ) self.ntp_offset_time = get_ntp_offset() @@ -79,9 +79,13 @@ def stop(self): def start(self): config = rs.config() context = rs.context() + try: - for device in context.query_devices(): - print(f"(RealSense): Available device: {device}") + for i, device in enumerate(context.query_devices()): + try: + print(f"(RealSense): {i}: Available device: {device}") + except Exception: + print(f"(RealSense): Failed to query device {i} (Ignoring)") except Exception: print("(RealSense): FAILED TO QUERY DEVICES (Ignoring)") diff --git a/workflows/telesurgery/scripts/holohub/operators/camera/sim.py b/workflows/telesurgery/scripts/holohub/operators/camera/sim.py index 57430bff..e9a086dd 100644 --- a/workflows/telesurgery/scripts/holohub/operators/camera/sim.py +++ b/workflows/telesurgery/scripts/holohub/operators/camera/sim.py @@ -41,7 +41,7 @@ def __init__(self, fragment, width: int, height: int, *args, **kwargs): self.width = width self.height = height self.stream_type = 2 # color - self.stream_format = 5 # rgb + self.stream_format = 7 # rgba self.ntp_offset_time = get_ntp_offset() self.q: queue.Queue = queue.Queue() diff --git a/workflows/telesurgery/scripts/holohub/operators/dds/subscriber.py b/workflows/telesurgery/scripts/holohub/operators/dds/subscriber.py index 6197c329..ffca1518 100644 --- a/workflows/telesurgery/scripts/holohub/operators/dds/subscriber.py +++ b/workflows/telesurgery/scripts/holohub/operators/dds/subscriber.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os + import rti.connextdds as dds from holoscan.core import Operator from holoscan.core._core import OperatorSpec @@ -43,6 +45,7 @@ def start(self): dp = dds.DomainParticipant(domain_id=self.dds_domain_id) self.dds_reader = dds.DataReader(dds.Topic(dp, self.dds_topic, self.dds_topic_class)) # self.selector = self.dds_reader.select().max_samples(1) + print(f"NDDS_DISCOVERY_PEERS: {os.environ.get('NDDS_DISCOVERY_PEERS')}") print(f"Reading data from DDS: {self.dds_topic}:{self.dds_domain_id} => {self.dds_topic_class.__name__}") def compute(self, op_input, op_output, context): diff --git a/workflows/telesurgery/scripts/holohub/operators/mira/api_client.py b/workflows/telesurgery/scripts/holohub/operators/mira/api_client.py index c9f07253..1c9afc04 100644 --- a/workflows/telesurgery/scripts/holohub/operators/mira/api_client.py +++ b/workflows/telesurgery/scripts/holohub/operators/mira/api_client.py @@ -51,11 +51,15 @@ def listener(self): print(f"Received message: {json_message}") except Exception as e: print(f"Error in message listener: {e}") + self.cleanup() + raise e def compute(self, op_input, op_output, context): try: message = op_input.receive("input") message["id"] = self.rpc_id + if self.ws is None: + self.start() self.ws.send(json.dumps(message)) self.rpc_id += 1 diff --git a/workflows/telesurgery/scripts/holohub/operators/mira/gamepad/controller.py b/workflows/telesurgery/scripts/holohub/operators/mira/gamepad/controller.py index fcbffb41..07efc79e 100644 --- a/workflows/telesurgery/scripts/holohub/operators/mira/gamepad/controller.py +++ b/workflows/telesurgery/scripts/holohub/operators/mira/gamepad/controller.py @@ -37,7 +37,7 @@ class GamepadControllerOp(Operator): - def __init__(self, fragment, *args, control_mode: ControlMode = ControlMode.POLAR, **kwargs): + def __init__(self, fragment, *args, control_mode: ControlMode = ControlMode.CARTESIAN, **kwargs): self.control_mode = control_mode self.reset_grips = False self.west_east_pressed_time: Any = None @@ -83,7 +83,7 @@ def on_event(self, event: GamepadEvent) -> None: self.rpc("set_lightring", LIGHT_RING_SOLID_RED.to_dict()) return - is_xbox = "xbox" in event.name.lower() + is_xbox = "xbox" in event.name.lower() or "x-box" in event.name.lower() if event.os == "posix": map = LINUX_XBOX_MAP if is_xbox else LINUX_PRO_CONTROLLER_MAP else: diff --git a/workflows/telesurgery/scripts/holohub/operators/nvidia_video_codec/.gitignore b/workflows/telesurgery/scripts/holohub/operators/nvidia_video_codec/.gitignore index 9036cd34..28f8e6d5 100644 --- a/workflows/telesurgery/scripts/holohub/operators/nvidia_video_codec/.gitignore +++ b/workflows/telesurgery/scripts/holohub/operators/nvidia_video_codec/.gitignore @@ -1,3 +1,3 @@ nv_video_decoder/ nv_video_encoder/ -lib/ +libs/ diff --git a/workflows/telesurgery/scripts/holohub/operators/nvjpeg/encoder.py b/workflows/telesurgery/scripts/holohub/operators/nvjpeg/encoder.py index 72851e7a..8d4a4f5c 100644 --- a/workflows/telesurgery/scripts/holohub/operators/nvjpeg/encoder.py +++ b/workflows/telesurgery/scripts/holohub/operators/nvjpeg/encoder.py @@ -58,7 +58,7 @@ def compute(self, op_input, op_output, context): stream.compress_ratio = 1 else: original_len = np.prod(stream.data.shape) if isinstance(stream.data, np.ndarray) else len(stream.data) - stream.data = self.nvjpeg.encode(stream.data, self.quality) + stream.data = self.nvjpeg.encode(stream.data[:, :, :3], self.quality) stream.compress_ratio = original_len / len(stream.data) stream.encode_latency = (time.time() - start) * 1000 diff --git a/workflows/telesurgery/scripts/holohub/operators/stats.py b/workflows/telesurgery/scripts/holohub/operators/stats.py index fef24bda..125228be 100644 --- a/workflows/telesurgery/scripts/holohub/operators/stats.py +++ b/workflows/telesurgery/scripts/holohub/operators/stats.py @@ -62,7 +62,7 @@ def compute(self, op_input, op_output, context): e1 = np.mean(self.time_encode) d1 = np.mean(self.time_decode) c1 = np.mean(self.compress_ratio) - s1 = stream.width * stream.height * (3 if stream.format == 5 else 1) + s1 = stream.width * stream.height * (4 if stream.type == 2 else 1) # Print the results print( diff --git a/workflows/telesurgery/scripts/patient/nvjpeg_encoder_params.json b/workflows/telesurgery/scripts/patient/nvjpeg_encoder_params.json new file mode 100644 index 00000000..7690d376 --- /dev/null +++ b/workflows/telesurgery/scripts/patient/nvjpeg_encoder_params.json @@ -0,0 +1,3 @@ +{ + "quality": 90 +} diff --git a/workflows/telesurgery/scripts/patient/physical/camera.py b/workflows/telesurgery/scripts/patient/physical/camera.py index dbb5288e..b2d23a22 100644 --- a/workflows/telesurgery/scripts/patient/physical/camera.py +++ b/workflows/telesurgery/scripts/patient/physical/camera.py @@ -15,6 +15,7 @@ import argparse import json +import os from holohub.operators.camera.cv2 import CV2VideoCaptureOp from holohub.operators.camera.realsense import RealsenseOp @@ -22,7 +23,6 @@ from holohub.operators.nvidia_video_codec.utils.camera_stream_merge import CameraStreamMergeOp from holohub.operators.nvidia_video_codec.utils.camera_stream_split import CameraStreamSplitOp from holohub.operators.nvjpeg.encoder import NVJpegEncoderOp -from holohub.operators.sink import NoOp from holoscan.core import Application from holoscan.resources import BlockMemoryPool, MemoryStorageType from schemas.camera_stream import CameraStream @@ -92,7 +92,6 @@ def compose(self): self, name="nvc_encoder", cuda_device_ordinal=0, - copy_to_host=False, width=self.width, height=self.height, codec=self.encoder_params.get("codec", "H264"), @@ -104,7 +103,7 @@ def compose(self): allocator=BlockMemoryPool( self, name="pool", - storage_type=MemoryStorageType.DEVICE, + storage_type=MemoryStorageType.HOST, block_size=self.width * self.height * 3 * 4, num_blocks=2, ), @@ -130,8 +129,6 @@ def compose(self): dds_topic_class=CameraStream, ) - sink = NoOp(self) - if self.encoder == "nvc": print("Using NVC encoder with split and merge") self.add_flow(source, split_op, {("output", "input")}) @@ -139,11 +136,9 @@ def compose(self): self.add_flow(split_op, merge_op, {("metadata", "metadata")}) self.add_flow(encoder_op, merge_op, {("output", "camera")}) self.add_flow(merge_op, dds, {("output", "input")}) - self.add_flow(dds, sink, {("output", "input")}) else: self.add_flow(source, encoder_op, {("output", "input")}) self.add_flow(encoder_op, dds, {("output", "input")}) - self.add_flow(dds, sink, {("output", "input")}) def main(): @@ -157,12 +152,27 @@ def main(): parser.add_argument("--framerate", type=int, default=30, help="frame rate") parser.add_argument("--stream_type", type=str, default="color", choices=["color", "depth"]) parser.add_argument("--stream_format", type=str, default="") - parser.add_argument("--encoder", type=str, choices=["nvjpeg", "nvc", "none"], default="nvjpeg") - parser.add_argument("--encoder_params", type=str, default=json.dumps({"quality": 90}), help="encoder params") - parser.add_argument("--domain_id", type=int, default=779, help="dds domain id") + parser.add_argument("--encoder", type=str, choices=["nvjpeg", "nvc", "none"], default="nvc", help="encoder type") + parser.add_argument("--encoder_params", type=str, default=None, help="encoder params") + parser.add_argument("--domain_id", type=int, default=9, help="dds domain id") parser.add_argument("--topic", type=str, default="", help="dds topic name") args = parser.parse_args() + + if args.encoder == "nvjpeg" and args.encoder_params is None: + encoder_params = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nvjpeg_encoder_params.json") + elif args.encoder == "nvc" and args.encoder_params is None: + encoder_params = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nvc_encoder_params.json") + elif os.path.isfile(args.encoder_params): + encoder_params = args.encoder_params + else: + encoder_params = json.loads(args.encoder_params) if args.encoder_params else {} + + print(f"Encoder params: {encoder_params}") + if os.path.isfile(encoder_params): + with open(encoder_params) as f: + encoder_params = json.load(f) + app = App( camera=args.camera, camera_name=args.name, @@ -173,7 +183,7 @@ def main(): stream_type=args.stream_type, stream_format=args.stream_format, encoder=args.encoder, - encoder_params=json.loads(args.encoder_params) if args.encoder_params else {}, + encoder_params=encoder_params, dds_domain_id=args.domain_id, dds_topic=args.topic if args.topic else f"telesurgery/{args.name}_camera/rgb", ) diff --git a/workflows/telesurgery/scripts/patient/simulation/camera/app.py b/workflows/telesurgery/scripts/patient/simulation/camera/app.py index 9ae1292c..323d0c6e 100644 --- a/workflows/telesurgery/scripts/patient/simulation/camera/app.py +++ b/workflows/telesurgery/scripts/patient/simulation/camera/app.py @@ -68,7 +68,6 @@ def compose(self): self, name="nvc_encoder", cuda_device_ordinal=0, - copy_to_host=False, width=self.width, height=self.height, codec=self.encoder_params.get("codec", "H264"), @@ -80,7 +79,7 @@ def compose(self): allocator=BlockMemoryPool( self, name="pool", - storage_type=MemoryStorageType.DEVICE, + storage_type=MemoryStorageType.HOST, block_size=self.width * self.height * 3 * 4, num_blocks=2, ), diff --git a/workflows/telesurgery/scripts/patient/simulation/camera/sensor.py b/workflows/telesurgery/scripts/patient/simulation/camera/sensor.py index 2cf401c1..5d97d7ab 100644 --- a/workflows/telesurgery/scripts/patient/simulation/camera/sensor.py +++ b/workflows/telesurgery/scripts/patient/simulation/camera/sensor.py @@ -18,10 +18,9 @@ class CameraEx(Camera): - def __init__(self, channels=3, *args, **kwargs): + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.channels = channels self.prev_rendering_frame = -1 self.frame_counter = 0 self.callback = None @@ -37,6 +36,5 @@ def _data_acquisition_callback(self, event: carb.events.IEvent): rgba = self._current_frame["rgba"] if not rgba.shape[0] == 0 and self.callback is not None: - rgb = rgba[:, :, : self.channels] - self.callback(rgb, self.frame_counter) + self.callback(rgba, self.frame_counter) self.frame_counter += 1 diff --git a/workflows/telesurgery/scripts/patient/simulation/main.py b/workflows/telesurgery/scripts/patient/simulation/main.py index 35b5a4b7..d477235f 100644 --- a/workflows/telesurgery/scripts/patient/simulation/main.py +++ b/workflows/telesurgery/scripts/patient/simulation/main.py @@ -33,13 +33,13 @@ def main(): parser.add_argument("--width", type=int, default=1920, help="width") parser.add_argument("--height", type=int, default=1080, help="height") parser.add_argument("--framerate", type=int, default=30, help="frame rate") - parser.add_argument("--encoder", type=str, choices=["nvjpeg", "nvc", "none"], default="nvjpeg") - parser.add_argument("--encoder_params", type=str, default=json.dumps({"quality": 90}), help="encoder params") - parser.add_argument("--domain_id", type=int, default=779, help="dds domain id") + parser.add_argument("--encoder", type=str, choices=["nvjpeg", "nvc", "none"], default="nvc", help="encoder type") + parser.add_argument("--encoder_params", type=str, default=None, help="encoder params") + parser.add_argument("--domain_id", type=int, default=9, help="dds domain id") parser.add_argument("--topic", type=str, default="", help="dds topic name") parser.add_argument("--api_host", type=str, default="0.0.0.0", help="local api server host") parser.add_argument("--api_port", type=int, default=8081, help="local api server port") - parser.add_argument("--timeline_play", type=bool, default=False, help="play the timeline") + parser.add_argument("--timeline_play", type=bool, default=True, help="play the timeline") args = parser.parse_args() app_launcher = AppLauncher(headless=False) @@ -105,7 +105,6 @@ def on_gamepad_event(message): from patient.simulation.camera.sensor import CameraEx camera = CameraEx( - channels=4 if args.encoder == "nvc" else 3, prim_path=camera_prim_path, frequency=args.framerate, resolution=(args.width, args.height), @@ -115,12 +114,21 @@ def on_gamepad_event(message): # holoscan app in async mode to consume camera source from patient.simulation.camera.app import App as CameraApp - if os.path.isfile(args.encoder_params): - with open(args.encoder_params, "r") as f: - encoder_params = json.load(f) + if args.encoder == "nvjpeg" and args.encoder_params is None: + encoder_params = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nvjpeg_encoder_params.json") + elif args.encoder == "nvc" and args.encoder_params is None: + encoder_params = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nvc_encoder_params.json") + elif os.path.isfile(args.encoder_params): + encoder_params = args.encoder_params else: encoder_params = json.loads(args.encoder_params) if args.encoder_params else {} + print(f"Encoder params: {encoder_params}") + + if os.path.isfile(encoder_params): + with open(encoder_params) as f: + encoder_params = json.load(f) + camera_app = CameraApp( width=args.width, height=args.height, diff --git a/workflows/telesurgery/scripts/surgeon/camera.py b/workflows/telesurgery/scripts/surgeon/camera.py index 847e3072..78af52c3 100644 --- a/workflows/telesurgery/scripts/surgeon/camera.py +++ b/workflows/telesurgery/scripts/surgeon/camera.py @@ -98,8 +98,8 @@ def main(): parser.add_argument("--name", type=str, default="robot", help="camera name") parser.add_argument("--width", type=int, default=1920, help="width") parser.add_argument("--height", type=int, default=1080, help="height") - parser.add_argument("--decoder", type=str, choices=["nvjpeg", "none", "nvc"], default="nvjpeg") - parser.add_argument("--domain_id", type=int, default=779, help="dds domain id") + parser.add_argument("--decoder", type=str, choices=["nvjpeg", "none", "nvc"], default="nvc", help="decoder type") + parser.add_argument("--domain_id", type=int, default=9, help="dds domain id") parser.add_argument("--topic", type=str, default="", help="dds topic name") args = parser.parse_args() diff --git a/workflows/telesurgery/tests/README.md b/workflows/telesurgery/tests/README.md new file mode 100644 index 00000000..3a76c4f8 --- /dev/null +++ b/workflows/telesurgery/tests/README.md @@ -0,0 +1,10 @@ +# Telesurgery Unit Test + +Use the following command to setup a new virtual environment and run the tests: + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -r workflows/telesurgery/tests/requirements.txt +./workflows/telesurgery/tests/run.sh +``` diff --git a/workflows/telesurgery/tests/requirements.txt b/workflows/telesurgery/tests/requirements.txt new file mode 100644 index 00000000..b4c97ccd --- /dev/null +++ b/workflows/telesurgery/tests/requirements.txt @@ -0,0 +1,5 @@ +pytest +holoscan==3.2.0 +numpy<2.0.0 +pynvjpeg +vgamepad diff --git a/workflows/telesurgery/tests/run.sh b/workflows/telesurgery/tests/run.sh new file mode 100755 index 00000000..e291115e --- /dev/null +++ b/workflows/telesurgery/tests/run.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/" >/dev/null 2>&1 && pwd)" + +# Convert relative paths to absolute paths +SCRIPTS_DIR="$(cd "$SCRIPT_DIR/../scripts" && pwd)" + +PYTHONPATH="$SCRIPTS_DIR:$SCRIPT_DIR:$PYTHONPATH" +export PYTHONPATH +echo "PYTHONPATH: $PYTHONPATH" +echo "Running tests..." +# Run the test +python -m unittest discover -s $SCRIPT_DIR $@ diff --git a/workflows/telesurgery/tests/test_camera/__init__.py b/workflows/telesurgery/tests/test_camera/__init__.py new file mode 100644 index 00000000..32c11276 --- /dev/null +++ b/workflows/telesurgery/tests/test_camera/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/telesurgery/tests/test_camera/test_sim_camera_source.py b/workflows/telesurgery/tests/test_camera/test_sim_camera_source.py new file mode 100644 index 00000000..f12c01a9 --- /dev/null +++ b/workflows/telesurgery/tests/test_camera/test_sim_camera_source.py @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import concurrent.futures +import gc +import time +import unittest + +import numpy as np +from holohub.operators.camera.sim import IsaacSimCameraSourceOp +from holoscan.conditions import CountCondition +from holoscan.core import Application, Operator, OperatorSpec + + +class AssertionOp(Operator): + def __init__(self, fragment, assertion_callback, *args, **kwargs): + self.assertion_callback = assertion_callback + super().__init__(fragment, *args, **kwargs) + + def setup(self, spec: OperatorSpec): + spec.input("stream") + + def compute(self, op_input, op_output, context): + stream = op_input.receive("stream") + self.assertion_callback(stream) + + +class TestSimCameraSourceApplication(Application): + def __init__(self, width, height, assertion_callback): + self.width = width + self.height = height + self.assertion_callback = assertion_callback + self.sim_camera_source_op = None + super().__init__() + + def compose(self): + print("compose") + self.sim_camera_source_op = IsaacSimCameraSourceOp( + self, name="sim_camera_source_op", width=self.width, height=self.height, count=CountCondition(self, 1) + ) + assertion_op = AssertionOp( + self, name="assertion_op", assertion_callback=self.assertion_callback, count=CountCondition(self, 1) + ) + + self.add_flow(self.sim_camera_source_op, assertion_op, {("output", "stream")}) + + def push_data(self, data): + self.sim_camera_source_op.on_new_frame_rcvd(data, 0) + + +class TestSimCameraSource(unittest.TestCase): + def setUp(self): + self.assertion_complete = False + self.app = TestSimCameraSourceApplication(width=100, height=100, assertion_callback=self.assertion_callback) + self.future = self.app.run_async() + while self.app.sim_camera_source_op is None: + print("Waiting for sim_camera_source_op to be initialized") + time.sleep(0.1) + + def tearDown(self): + # Wait for the application to complete naturally + try: + # Since both operators have CountCondition(1), the app should finish quickly + self.future.result(timeout=5) # Wait up to 5 seconds for completion + except concurrent.futures.TimeoutError: + self.future.cancel() + time.sleep(0.1) # Give it a moment to clean up + except Exception as e: + print(f"Exception during tearDown: {e}") + + gc.collect() + + def test_sim_camera_source(self): + self.width = 100 + self.height = 100 + self.expected_data = np.random.randint(0, 255, (self.height, self.width, 4), dtype=np.uint8) + self.app.push_data(self.expected_data) + while not self.assertion_complete: + time.sleep(0.1) + + def assertion_callback(self, stream): + assert np.array_equal(stream.data, self.expected_data) + assert stream.width == self.width + assert stream.height == self.height + self.assertion_complete = True diff --git a/workflows/telesurgery/tests/test_common/__init__.py b/workflows/telesurgery/tests/test_common/__init__.py new file mode 100644 index 00000000..32c11276 --- /dev/null +++ b/workflows/telesurgery/tests/test_common/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/telesurgery/tests/test_common/test_utils.py b/workflows/telesurgery/tests/test_common/test_utils.py new file mode 100644 index 00000000..ac79bcaa --- /dev/null +++ b/workflows/telesurgery/tests/test_common/test_utils.py @@ -0,0 +1,143 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import unittest +from unittest.mock import MagicMock, Mock, patch + +# Mock ntplib module before importing utils to handle missing dependency +sys.modules["ntplib"] = MagicMock() + + +class TestGetNtpOffset(unittest.TestCase): + """Test cases for get_ntp_offset function.""" + + @patch("common.utils.ntplib.NTPClient") + def test_get_ntp_offset_success(self, mock_ntp_client_class): + """Test successful NTP offset retrieval.""" + from common.utils import get_ntp_offset + + # Mock the NTP client and response + mock_client = Mock() + mock_response = Mock() + mock_response.offset = 0.123456 + mock_client.request.return_value = mock_response + mock_ntp_client_class.return_value = mock_client + + # Test with explicit parameters + offset = get_ntp_offset(ntp_server="pool.ntp.org", ntp_port=123, version=3) + + self.assertEqual(offset, 0.123456) + mock_ntp_client_class.assert_called_once() + mock_client.request.assert_called_once_with("pool.ntp.org", port=123, version=3) + + @patch("common.utils.ntplib.NTPClient") + def test_get_ntp_offset_exception(self, mock_ntp_client_class): + """Test NTP offset when request raises an exception.""" + from common.utils import get_ntp_offset + + # Mock the NTP client to raise an exception + mock_client = Mock() + mock_client.request.side_effect = Exception("Connection timeout") + mock_ntp_client_class.return_value = mock_client + + # Test that exception is handled and returns 0 + offset = get_ntp_offset(ntp_server="pool.ntp.org", ntp_port=123, version=3) + + self.assertEqual(offset, 0) + mock_ntp_client_class.assert_called_once() + mock_client.request.assert_called_once_with("pool.ntp.org", port=123, version=3) + + def test_get_ntp_offset_no_server(self): + """Test NTP offset when no server is provided.""" + from common.utils import get_ntp_offset + + # Test with None server + offset = get_ntp_offset(ntp_server=None) + self.assertEqual(offset, 0) + + # Test with empty string server + offset = get_ntp_offset(ntp_server="") + self.assertEqual(offset, 0) + + @patch.dict(os.environ, {"NTP_SERVER_HOST": "test.ntp.org", "NTP_SERVER_PORT": "1234"}) + def test_get_ntp_offset_env_variables(self): + """Test NTP offset using environment variables for configuration.""" + # Need to reload the module after setting environment variables + import importlib + + import common.utils + + importlib.reload(common.utils) + from common.utils import get_ntp_offset + + # Mock the NTP client and response after reload + with patch("common.utils.ntplib.NTPClient") as mock_ntp_client_class: + mock_client = Mock() + mock_response = Mock() + mock_response.offset = -0.987654 + mock_client.request.return_value = mock_response + mock_ntp_client_class.return_value = mock_client + + # Test with default parameters (should use env vars) + offset = get_ntp_offset() + + self.assertEqual(offset, -0.987654) + mock_client.request.assert_called_once_with("test.ntp.org", port=1234, version=3) + + @patch.dict(os.environ, {}, clear=True) + def test_get_ntp_offset_no_env_variables(self): + """Test NTP offset when no environment variables are set.""" + # Need to reload the module after clearing environment variables + import importlib + + import common.utils + + importlib.reload(common.utils) + from common.utils import get_ntp_offset + + # Test with no environment variables set + offset = get_ntp_offset() + self.assertEqual(offset, 0) + + @patch.dict(os.environ, {"NTP_SERVER_HOST": "test.ntp.org"}, clear=True) + def test_get_ntp_offset_default_port(self): + """Test NTP offset with default port when NTP_SERVER_PORT is not set.""" + # Need to reload the module after setting environment variables + import importlib + + import common.utils + + importlib.reload(common.utils) + from common.utils import get_ntp_offset + + # Mock the NTP client and response after reload + with patch("common.utils.ntplib.NTPClient") as mock_ntp_client_class: + mock_client = Mock() + mock_response = Mock() + mock_response.offset = 0.555555 + mock_client.request.return_value = mock_response + mock_ntp_client_class.return_value = mock_client + + # Test with default port (should be 123) + offset = get_ntp_offset() + + self.assertEqual(offset, 0.555555) + mock_client.request.assert_called_once_with("test.ntp.org", port=123, version=3) + + +if __name__ == "__main__": + unittest.main() diff --git a/workflows/telesurgery/tests/test_dataflow_integration.py b/workflows/telesurgery/tests/test_dataflow_integration.py index fdebfcee..51861aff 100644 --- a/workflows/telesurgery/tests/test_dataflow_integration.py +++ b/workflows/telesurgery/tests/test_dataflow_integration.py @@ -53,7 +53,7 @@ def setUp(self): def start_process(self, cmd, log_file=None, env=None): if log_file: - stdout = open(log_file, "w") + stdout = open(log_file, "w", encoding="utf-8") self.log_files.append(stdout) else: stdout = subprocess.PIPE @@ -103,7 +103,7 @@ def test_data_loop_with_gamepad(self): time.sleep(20) # Check surgeon process log for evidence of received frames - with open(self.surgeon_camera_log, "r") as f: + with open(self.surgeon_camera_log, "r", encoding="utf-8") as f: camera_log = f.read() self.assertIn("fps:", camera_log, "Camera data not received on surgeon side") @@ -127,7 +127,7 @@ def test_data_loop_with_gamepad(self): time.sleep(10) # Check patient log for evidence of robot control command received - with open(self.patient_log, "r") as f: + with open(self.patient_log, "r", encoding="utf-8") as f: patient_log = f.read() self.assertTrue( "Update (" in patient_log or "set_mira" in patient_log, "Robot control command not received on patient side" diff --git a/workflows/telesurgery/tests/test_dds/__init__.py b/workflows/telesurgery/tests/test_dds/__init__.py new file mode 100644 index 00000000..32c11276 --- /dev/null +++ b/workflows/telesurgery/tests/test_dds/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/telesurgery/tests/test_dds/common.py b/workflows/telesurgery/tests/test_dds/common.py new file mode 100644 index 00000000..250adeef --- /dev/null +++ b/workflows/telesurgery/tests/test_dds/common.py @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import rti.idl as idl + + +@idl.struct +class TestData: + data: int = 0 diff --git a/workflows/telesurgery/tests/test_dds/test_publisher.py b/workflows/telesurgery/tests/test_dds/test_publisher.py new file mode 100644 index 00000000..fc6ff8bf --- /dev/null +++ b/workflows/telesurgery/tests/test_dds/test_publisher.py @@ -0,0 +1,98 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import queue +import random +import time +import unittest +from unittest.mock import MagicMock, patch + +# Add the DDS module to path +from holohub.operators.dds.publisher import DDSPublisherOp +from holoscan.conditions import CountCondition +from holoscan.core import Application, Operator, OperatorSpec + +from .common import TestData + + +class TestDataProviderOp(Operator): + def __init__(self, fragment, *args, **kwargs): + super().__init__(fragment, *args, **kwargs) + self.queue = queue.Queue() + + def setup(self, spec: OperatorSpec): + spec.output("output") + + def compute(self, op_input, op_output, context): + data = self.queue.get() + op_output.emit(data, "output") + + def enqueue(self, data: TestData): + self.queue.put(data) + + +class TestDDSPublisherApplication(Application): + def __init__(self, dds_domain_id, dds_topic): + self.dds_domain_id = dds_domain_id + self.dds_topic = dds_topic + self.dds_topic_class = TestData + self.data_provider_op = None + super().__init__() + + def compose(self): + self.data_provider_op = TestDataProviderOp(self, name="data_provider_op", count=CountCondition(self, 1)) + publisher_op = DDSPublisherOp( + self, + name="publisher_op", + dds_domain_id=self.dds_domain_id, + dds_topic=self.dds_topic, + dds_topic_class=self.dds_topic_class, + count=CountCondition(self, 1), + ) + + self.add_flow(self.data_provider_op, publisher_op) + + def enqueue(self, data: TestData): + self.data_provider_op.enqueue(data) + + +class TestDDSPublisher(unittest.TestCase): + def test_dds_publisher(self): + # mock the dds writer + with ( + patch("rti.connextdds.DomainParticipant") as _, + patch("rti.connextdds.DataWriter") as mock_dds_writer, + patch("rti.connextdds.Topic") as mock_topic, + ): + mock_writer_instance = MagicMock() + mock_writer_instance.write = MagicMock() + mock_dds_writer.return_value = mock_writer_instance + mock_topic.return_value = MagicMock() + + app = TestDDSPublisherApplication( + dds_domain_id=1, + dds_topic="test_topic", + ) + future = app.run_async() + + while app.data_provider_op is None: + print("Waiting for data_provider_op to be initialized") + time.sleep(0.1) + + data = TestData(data=random.randint(0, 100)) + app.enqueue(data) + future.result(timeout=5) + + mock_writer_instance.write.assert_called_once_with(data) diff --git a/workflows/telesurgery/tests/test_dds/test_subscriber.py b/workflows/telesurgery/tests/test_dds/test_subscriber.py new file mode 100644 index 00000000..2a78ef6a --- /dev/null +++ b/workflows/telesurgery/tests/test_dds/test_subscriber.py @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import time +import unittest +from unittest.mock import MagicMock, patch + +# Add the DDS module to path +from holohub.operators.dds.subscriber import DDSSubscriberOp +from holoscan.conditions import CountCondition +from holoscan.core import Application, Operator, OperatorSpec + +from .common import TestData + + +class AssertionOp(Operator): + def __init__(self, fragment, assertion_callback, *args, **kwargs): + self.assertion_callback = assertion_callback + super().__init__(fragment, *args, **kwargs) + + def setup(self, spec: OperatorSpec): + spec.input("input") + + def compute(self, op_input, op_output, context): + data = op_input.receive("input") + self.assertion_callback(data) + + +class TestDDSSubscriberApplication(Application): + def __init__(self, dds_domain_id, dds_topic, assertion_callback): + self.dds_domain_id = dds_domain_id + self.dds_topic = dds_topic + self.dds_topic_class = TestData + self.assertion_op = None + self.assertion_callback = assertion_callback + super().__init__() + + def compose(self): + subscriber_op = DDSSubscriberOp( + self, + name="subscriber_op", + dds_domain_id=self.dds_domain_id, + dds_topic=self.dds_topic, + dds_topic_class=self.dds_topic_class, + count=CountCondition(self, 1), + ) + self.assertion_op = AssertionOp(self, name="assertion_op", assertion_callback=self.assertion_callback) + + self.add_flow(subscriber_op, self.assertion_op) + + +class TestDDSSubscriber(unittest.TestCase): + def test_dds_subscriber(self): + self.test_data = TestData(data=random.randint(0, 100)) + # mock the dds reader - patch the actual rti.connextdds module + with ( + patch("rti.connextdds.DomainParticipant") as mock_domain_participant, + patch("rti.connextdds.DataReader") as mock_dds_reader, + patch("rti.connextdds.Topic") as mock_topic, + ): + mock_reader_instance = MagicMock() + mock_reader_instance.take_data = MagicMock() + mock_reader_instance.take_data.return_value = [self.test_data] + mock_dds_reader.return_value = mock_reader_instance + mock_topic.return_value = MagicMock() + mock_domain_participant.return_value = MagicMock() + + app = TestDDSSubscriberApplication( + dds_domain_id=1, + dds_topic="test_topic", + assertion_callback=self.assertion_callback, + ) + future = app.run_async() + + while app.assertion_op is None: + print("Waiting for assertion_op to be initialized") + time.sleep(0.1) + + future.result(timeout=5) + + mock_reader_instance.take_data.assert_called_once() + + def assertion_callback(self, data): + self.assertEqual(data, self.test_data) diff --git a/workflows/telesurgery/tests/test_nvjpeg/__init__.py b/workflows/telesurgery/tests/test_nvjpeg/__init__.py new file mode 100644 index 00000000..32c11276 --- /dev/null +++ b/workflows/telesurgery/tests/test_nvjpeg/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/telesurgery/tests/test_nvjpeg/test_nvjpeg_operators.py b/workflows/telesurgery/tests/test_nvjpeg/test_nvjpeg_operators.py new file mode 100644 index 00000000..a4e4eeba --- /dev/null +++ b/workflows/telesurgery/tests/test_nvjpeg/test_nvjpeg_operators.py @@ -0,0 +1,87 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from holohub.operators.nvjpeg.decoder import NVJpegDecoderOp +from holohub.operators.nvjpeg.encoder import NVJpegEncoderOp +from holoscan.core import Application, Operator, OperatorSpec +from schemas.camera_stream import CameraStream + + +class RandomPixelOp(Operator): + def __init__(self, fragment, width, height, *args, **kwargs): + self.width = width + self.height = height + super().__init__(fragment, *args, **kwargs) + + def setup(self, spec: OperatorSpec): + spec.output("output") + + def compute(self, op_input, op_output, context): + # randomly generate a 3 channel image of size width x height + image = np.random.randint(0, 255, (self.height, self.width, 3), dtype=np.uint8) + bgr_image = image[..., ::-1].copy() + camera_stream = CameraStream(data=bgr_image, width=self.width, height=self.height) + op_output.emit(camera_stream, "output") + + +class AssertionOp(Operator): + def __init__(self, fragment, assertion_callback, *args, **kwargs): + self.assertion_callback = assertion_callback + super().__init__(fragment, *args, **kwargs) + + def setup(self, spec: OperatorSpec): + spec.input("camera") + spec.input("stream") + spec.input("expected") + + def compute(self, op_input, op_output, context): + stream = op_input.receive("stream") + expected = op_input.receive("expected") + self.assertion_callback(stream, expected) + + +class TestNVJPEGOpsApplication(Application): + def __init__(self, width, height, quality, assertion_callback): + self.width = width + self.height = height + self.quality = quality + self.assertion_callback = assertion_callback + super().__init__() + + def compose(self): + random_pixel_op = RandomPixelOp(self, name="random_pixel_op", width=self.width, height=self.height) + nvjpeg_encoder_op = NVJpegEncoderOp(self, name="nvjpeg_encoder_op", skip=False, quality=self.quality) + nvjpeg_decoder_op = NVJpegDecoderOp(self, name="nvjpeg_decoder_op", skip=False) + assertion_op = AssertionOp(self, name="assertion_op", assertion_callback=self.assertion_callback) + + self.add_flow(random_pixel_op, nvjpeg_encoder_op, {("output", "input")}) + self.add_flow(nvjpeg_encoder_op, nvjpeg_decoder_op, {("output", "input")}) + self.add_flow(nvjpeg_decoder_op, assertion_op, {("camera", "camera"), ("output", "stream")}) + self.add_flow(random_pixel_op, assertion_op, {("output", "expected")}) + + +class TestNVJPEGOperators(unittest.TestCase): + def test_nvjpeg_operators(self): + app = TestNVJPEGOpsApplication(width=100, height=100, quality=90, assertion_callback=self.assertion_callback) + app.run() + + def assertion_callback(self, stream, expected): + # compare two numpy arrays and ensure they are equal + assert np.array_equal(stream.data, expected.data) + assert stream.width == expected.width + assert stream.height == expected.height