Skip to main content
🤝

이 가이드는 인류와 AI가 함께 만드는 지식입니다.

이 콘텐츠는 Human + AI Partnership 철학 아래 모든 사람이 로봇·AI를 배울 수 있도록 무료로 제공됩니다. 당신의 질문과 기여가 다음 학생의 미래를 바꿉니다.

ROS 2 Performance Optimization & Real-time Systems Guide 2026

Tune ROS 2 for sub-millisecond latency, deterministic behavior, and industrial-grade reliability. Master CPU affinity, memory management, QoS tuning, and real-time kernel configuration.

1. Real-time Kernel Setup

Install and configure Linux real-time kernel:

# Install PREEMPT_RT patch kernel (Ubuntu)
sudo apt install linux-image-rt-generic

# Verify RT kernel
uname -r | grep rt

# Check RT capabilities
sudo chrt -p $$  # Show current process scheduling

# Set real-time priority for ROS 2 nodes
sudo chrt -p 80 <pid>  # Priority 80 (80-99 = real-time)

2. CPU Pinning & Thread Affinity

Bind ROS 2 nodes to specific CPU cores:

// CPU affinity in C++ (ROS 2)
#include <rclcpp/rclcpp.hpp>
#include <pthread.h>
#include <sched.h>

class PinnedNode : public rclcpp::Node {
 public:
  PinnedNode() : Node("pinned_node") {
    // Pin to CPU core 0
    cpu_set_t set;
    CPU_ZERO(&set);
    CPU_SET(0, &set);  // Core 0
    pthread_setaffinity_np(pthread_self(), sizeof(set), &set);

    RCLCPP_INFO(get_logger(), "Node pinned to CPU 0");

    // Timer with high priority
    timer_ = create_wall_timer(
      std::chrono::milliseconds(1),
      [this]() { on_timer(); });
  }

 private:
  void on_timer() {
    // Time-critical control loop
    auto now = rclcpp::Clock().now();
    RCLCPP_DEBUG(get_logger(), "Loop timing: %ld ns", now.nanoseconds());
  }

  rclcpp::TimerBase::SharedPtr timer_;
};

3. QoS Tuning for Latency

Optimize QoS profiles for real-time performance:

// Minimal-latency QoS configuration
rclcpp::QoS qos(1);  // History depth = 1 (latest only)
qos.reliability(rclcpp::ReliabilityPolicy::BestEffort);
qos.durability(rclcpp::DurabilityPolicy::Volatile);
qos.deadline(std::chrono::milliseconds(5));  // 5ms deadline
qos.lifespan(std::chrono::milliseconds(50)); // 50ms lifespan

auto publisher = node->create_publisher<geometry_msgs::msg::Twist>(
  "cmd_vel", qos);

// Subscriber with matching QoS
auto subscriber = node->create_subscription<sensor_msgs::msg::Imu>(
  "imu", qos,
  [](const sensor_msgs::msg::Imu::SharedPtr msg) {
    // Process IMU data with minimal buffering
  });

4. Intra-Process Communication (IPC)

Use zero-copy IPC for same-process communication:

// Enable intra-process communication
#include <rclcpp/experimental/intra_process_manager.hpp>

int main() {
  rclcpp::init(0, nullptr);

  // Create executor with IPC enabled
  rclcpp::executors::SingleThreadedExecutor executor;

  auto node = rclcpp::Node::make_shared("ipc_node");
  executor.add_node(node);

  // IPC context: zero-copy delivery within process
  rclcpp::experimental::ExecutorOptions options;
  options.context = rclcpp::contexts::default_context::get_global_default_context();

  executor.spin();
  rclcpp::shutdown();
  return 0;
}

5. Memory Optimization & Pre-allocation

Pre-allocate memory to prevent runtime allocation:

// Pre-allocate message buffers
#include <rclcpp/rclcpp.hpp>

class OptimizedPublisher : public rclcpp::Node {
 public:
  OptimizedPublisher() : Node("optimized_pub") {
    publisher_ = create_publisher<sensor_msgs::msg::Image>("image", 10);

    // Pre-allocate image buffer (1920x1080 RGBA)
    image_msg_.data.resize(1920 * 1080 * 4);
    image_msg_.width = 1920;
    image_msg_.height = 1080;

    timer_ = create_wall_timer(
      std::chrono::milliseconds(33),  // 30 FPS
      [this]() { publish_image(); });
  }

 private:
  void publish_image() {
    // Reuse pre-allocated buffer (no malloc)
    // Fill image data
    publisher_->publish(image_msg_);
  }

  rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr publisher_;
  sensor_msgs::msg::Image image_msg_;
  rclcpp::TimerBase::SharedPtr timer_;
};

6. Executor Configuration

Choose executor for optimal performance:

// Single-threaded executor (lowest latency, no lock contention)
rclcpp::executors::SingleThreadedExecutor executor;
executor.add_node(node);
executor.spin();

// Multi-threaded executor (for parallel tasks)
rclcpp::executors::MultiThreadedExecutor executor(
  rclcpp::ExecutorOptions().set_max_workers(4));

// Events executor (event-driven, minimal busy-waiting)
rclcpp::executors::EventsExecutor executor;

// StaticExecutor (pre-allocated, deterministic timing)
#include <rclcpp_components/node_main.hpp>
rclcpp::StaticExecutor static_exec;

7. Latency Profiling & Measurement

Profile ROS 2 system latency:

// Measure end-to-end latency
#include <chrono>

class LatencyMeasurer : public rclcpp::Node {
 public:
  LatencyMeasurer() : Node("latency_node") {
    sub_ = create_subscription<std_msgs::msg::Header>(
      "input", 10,
      [this](const std_msgs::msg::Header::SharedPtr msg) {
        auto now = rclcpp::Clock().now();
        auto latency_ns = now.nanoseconds() - msg->stamp.nanoseconds();
        RCLCPP_INFO(get_logger(), "Latency: %ld ns (%.3f ms)",
                    latency_ns, latency_ns / 1e6);
      });
  }

 private:
  rclcpp::Subscription<std_msgs::msg::Header>::SharedPtr sub_;
};

// Use ros2_tracing for detailed profiling
# ros2 trace /path/to/session
# ros2 trace plot

8. Networking Optimization

Tune network stack for low-latency communication:

# Disable Nagle's algorithm (reduces batching delay)
sudo sysctl -w net.ipv4.tcp_nodelay=1

# Increase socket buffer sizes
sudo sysctl -w net.core.rmem_max=134217728
sudo sysctl -w net.core.wmem_max=134217728

# Use dedicated network interface
export RMW_IMPLEMENTATION=rmw_cyclonedds_cpp
export CYCLONEDDS_URI=file:///etc/cyclonedds.xml

# Localhost-only (skip network stack)
export RMW_USE_LOCALHOST_ONLY=1

9. Real-time Deadline Monitoring

Detect missed deadlines:

// Deadline-aware control loop
class DeadlineMonitor : public rclcpp::Node {
 public:
  DeadlineMonitor() : Node("deadline_monitor") {
    timer_ = create_wall_timer(
      std::chrono::milliseconds(10),  // 10ms deadline
      [this]() { check_deadline(); });
  }

 private:
  void check_deadline() {
    auto start = std::chrono::steady_clock::now();

    // Perform control tasks
    do_control_tasks();

    auto elapsed = std::chrono::steady_clock::now() - start;
    if (elapsed > std::chrono::milliseconds(10)) {
      RCLCPP_WARN(get_logger(), "Deadline missed: %ld ms",
                  std::chrono::duration_cast<std::chrono::milliseconds>(
                    elapsed).count());
    }
  }

  rclcpp::TimerBase::SharedPtr timer_;
};

10. Benchmarking & Load Testing

Validate performance under load:

# ROS 2 performance benchmarking tools
ros2 run performance_test perf_test   --mode Publisher   --msg_type std_msgs/String   --publish_rate 10000  # 10k Hz

# Monitor system metrics
# CPU usage, memory, latency percentiles

# Real-time scheduling benchmark
chrt -p -m  # Show scheduling policies

# Latency histogram generation
# Analyze p50, p95, p99 latencies

Key Takeaways

Real-time ROS 2 requires kernel tuning, CPU affinity, careful QoS configuration, and intra-process communication. Achieve sub-millisecond latency by combining hardware isolation, memory pre-allocation, and single-threaded execution. Profile relentlessly and measure on target hardware.