Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
341 changes: 194 additions & 147 deletions doc/SAI-Proposal-HW-FRR.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,150 +11,197 @@

## 1.0 Introduction

SAI supports SW based FRR where the decision to switch over to the secondary path is triggered by the SW.

Following is the current SAI workflow for SW based FRR.
- Create a protection NH
nhg_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_ATTR_TYPE;
nhg_entry_attrs[0].value.u32 = SAI_NEXT_HOP_GROUP_TYPE_PROTECTION;

- Create primary and secondary members (Note members can be NHG as well)
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;

- Based on the monitoring object, SW sets the following boolean to trigger switchover
nhg_entry_attrs[1].id = SAI_NEXT_HOP_GROUP_ATTR_SET_SWITCHOVER;
nhg_entry_attrs[1].value.u32 = true;
saistatus = sai_set_next_hop_group_attribute_fn(nhg_id, nhg_entry_attrs);

## 2.0 HW Based Trigger
Main change in this proposal is the trigger. Now SW is not responsible for monitoring an object and triggering the switchover.
HW monitors the configured object and triggers the switch to secondary path based on the state of the monitored object.
For example if a port is being monitored and port goes down then all the NH resolving via this port will be switched over to the secondary path.

## 3.0 SAI Enhancements
Hardware needs to know ahead of time which NHG/NH are part of the secondary group so as to mark them as backup from the configured primary group. For this reason a hint is needed to identify such NHG/NH.

This hint is provided using a new NHG type
```c
/** Next hop hardware protection group. This is the group backing up the primary in the protection group type and is managed by hardware */
SAI_NEXT_HOP_GROUP_TYPE_HW_PROTECTION,
```

Additionally port counters are introduced to capture
- How many times port has participated in the failover
- Drops observed during failover

```c
/** SAI port stat if HW protection switchover events */
SAI_PORT_STAT_IF_IN_HW_PROTECTION_SWITCHOVER_EVENTS,

/** SAI port stat if HW protection switchover related packet drops */
SAI_PORT_STAT_IF_IN_HW_PROTECTION_SWITCHOVER_DROP_PKTS,
```

## 4.0 Example Workflow


### Topology Example
There are two uplinks from a switch and both are part of the primary and secondary group.
For such case we will
- Create a NHG nhg1 of type PROTECTION and configure NH1/port1 and NH2/port2 NH as primary members
- Create a NHG nhg2 of type HW_PROTECTION with members as NH1/port1 and NH2/port2
- Set NHG nhg2 as a secondary member of NHG nhg1

PROTECTION[nhg1] --> PRIMARY[NH1, NH2], SECONDARY[nhg2]
HW_PROTECTION[nhg2] --> [NH1, NH2]




```c
nh_1_interface_id = 1
nh_2_interface_id = 2
switch_id = 0;

nhg_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_ATTR_TYPE;
nhg_entry_attrs[0].value.u32 = SAI_NEXT_HOP_GROUP_TYPE_PROTECTION;
saistatus = sai_frr_api->create_next_hop_group(&nhg1, switch_id, 1, nhg_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

nhg_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_ATTR_TYPE;
nhg_entry_attrs[0].value.u32 = SAI_NEXT_HOP_GROUP_TYPE_HW_PROTECTION;
saistatus = sai_frr_api->create_next_hop_group(&nhg2, switch_id, 1, nhg_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

nh_entry_attrs[0].id = SAI_NEXT_HOP_ATTR_TYPE;
nh_entry_attrs[0].value.u32 = SAI_NEXT_HOP_TYPE_IP;
nh_entry_attrs[1].id = SAI_NEXT_HOP_ATTR_IP;
CONVERT_STRING_TO_SAI_IPV4(nh_entry_attrs[1].value, "10.1.1.1");
nh_entry_attrs[2].id = SAI_NEXT_HOP_ATTR_ROUTER_INTERFACE_ID;
nh_entry_attrs[2].value.u64 = nh_1_interface_id;
saistatus = sai_frr_api->create_next_hop(&nh_1_id, switch_id, 2, nh_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

nh_entry_attrs[0].id = SAI_NEXT_HOP_ATTR_TYPE;
nh_entry_attrs[0].value.u32 = SAI_NEXT_HOP_TYPE_IP;
nh_entry_attrs[1].id = SAI_NEXT_HOP_ATTR_IP;
CONVERT_STRING_TO_SAI_IPV4(nh_entry_attrs[1].value, "10.1.2.1");
nh_entry_attrs[2].id = SAI_NEXT_HOP_ATTR_ROUTER_INTERFACE_ID;
nh_entry_attrs[2].value.u64 = nh_2_interface_id;
saistatus = sai_frr_api->create_next_hop(&nh_2_id, switch_id, 2, nh_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

// Program the primary NH Group member.
nhgm_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID;
nhgm_entry_attrs[0].value.oid = nhg1;
nhgm_entry_attrs[1].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID;
nhgm_entry_attrs[1].value.oid = nh_1_id;
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;
saistatus = sai_frr_api->create_next_hop_group_member(&nhgm_1_id, switch_id, 2, nhgm_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

nhgm_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID;
nhgm_entry_attrs[0].value.oid = nhg1;
nhgm_entry_attrs[1].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID;
nhgm_entry_attrs[1].value.oid = nh_2_id;
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;
saistatus = sai_frr_api->create_next_hop_group_member(&nhgm_1_id, switch_id, 2, nhgm_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

// Program the secondary NH Group member.
nhgm_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID;
nhgm_entry_attrs[0].value.oid = nhg1;
nhgm_entry_attrs[1].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID;
nhgm_entry_attrs[1].value.oid = nhg2;
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;
saistatus = sai_frr_api->create_next_hop_group_member(&nhgm_2_id, switch_id, 2, nhgm_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

nhgm_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID;
nhgm_entry_attrs[0].value.oid = nhg_id;
nhgm_entry_attrs[1].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID;
nhgm_entry_attrs[1].value.oid = nh_2_id;
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;
saistatus = sai_frr_api->create_next_hop_group_member(&nhgm_3_id, switch_id, 2, nhgm_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}
```
SAI supports SW based FRR where the decision to switch over to the secondary path is triggered by the SW.

Following is the current SAI workflow for SW based FRR.
- Create a protection NH
nhg_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_ATTR_TYPE;
nhg_entry_attrs[0].value.u32 = SAI_NEXT_HOP_GROUP_TYPE_PROTECTION;

- Create primary and secondary members (Note members can be NHG as well)
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;

- Based on the monitoring object, SW sets the following boolean to trigger switchover
nhg_entry_attrs[1].id = SAI_NEXT_HOP_GROUP_ATTR_SET_SWITCHOVER;
nhg_entry_attrs[1].value.u32 = true;
saistatus = sai_set_next_hop_group_attribute_fn(nhg_id, nhg_entry_attrs);

## 2.0 HW Based Trigger
Main change in this proposal is the trigger. Now SW is not responsible for monitoring an object and triggering the switchover.
HW monitors the configured object and triggers the switch to secondary path based on the state of the monitored object.
For example if a port is being monitored and port goes down then all the NH resolving via this port will be switched over to the secondary path.

## 3.0 SAI Enhancements
Hardware needs to know ahead of time which NHG/NH are part of the secondary group so as to mark them as backup from the configured primary group. For this reason a hint is needed to identify such NHG/NH.

This hint is provided using a new NHG type
```c
/** Next hop hardware protection group. This is the group backing up the primary in the protection group type and is managed by hardware */
SAI_NEXT_HOP_GROUP_TYPE_HW_PROTECTION,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does SAI Knows which is primary across multiple NHGS created in the system. actually existing model already supports this i think. the only missing is H/w protection with callbacks when there is a state change.

Below is my understanding based on current model

  1. Protection between Next-Hop Groups (H-ECMP case)

NHG-1: Primary group (Endpoint-1)
NHG-2: Backup group (Endpoint-2)
NHG-3: Parent NHG with:
Member-1: Primary → NHG-1
Member-2: Backup → NHG-2

This models protection at the group level, which is particularly useful for hierarchical ECMP (H-ECMP) scenarios, where failover happens across endpoint groups.
Also, this use-case is already supported in the current model.

Example Flow:

Create NHG-1 with Tunnel endpoint-1
Create NHG-2 with Tunnel endpoint-2
Create NHG-3 and add:
NHG-1 as primary
NHG-2 as secondary (backup)

  1. Protection within a Next-Hop Group (Member-Level Protection)

In the current SAI model, this can be represented using smaller protected NHGs and composing them into a parent group:

Create NHG-2 with:
Primary → NH1
Backup → NH2
Create NHG-3 with:
Primary → NH2
Backup → NH1
NH3 and NH4 do not have backups
Create NHG-1 and add members:
Member-1: NHG-2
Member-2: NHG-3
Member-3: NH3
Member-4: NH4

This effectively models per-member protection, where some next-hops have protection while others do not.

For the current model, NHG-level notifications on switchover from primary to backup should be sufficient, in my view for H/w based protection. As the protection semantics are already captured through the existing NHG hierarchy/composition

Copy link
Copy Markdown
Contributor Author

@chikkaiah-work chikkaiah-work Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sathk-marvell, You are right all these are already exists. I'm just adding notification callback. the way diffs are created is confusing. The new changes are in this file starts from line #162.
Please let me know if I have missed anything form your feedback.

```

Additionally port counters are introduced to capture
- How many times port has participated in the failover
- Drops observed during failover

```c
/** SAI port stat if HW protection switchover events */
SAI_PORT_STAT_IF_IN_HW_PROTECTION_SWITCHOVER_EVENTS,

/** SAI port stat if HW protection switchover related packet drops */
SAI_PORT_STAT_IF_IN_HW_PROTECTION_SWITCHOVER_DROP_PKTS,
```

## 4.0 Example Workflow


### Topology Example
There are two uplinks from a switch and both are part of the primary and secondary group.
For such case we will
- Create a NHG nhg1 of type PROTECTION and configure NH1/port1 and NH2/port2 NH as primary members
- Create a NHG nhg2 of type HW_PROTECTION with members as NH1/port1 and NH2/port2
- Set NHG nhg2 as a secondary member of NHG nhg1

PROTECTION[nhg1] --> PRIMARY[NH1, NH2], SECONDARY[nhg2]
HW_PROTECTION[nhg2] --> [NH1, NH2]




```c
nh_1_interface_id = 1
nh_2_interface_id = 2
switch_id = 0;

nhg_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_ATTR_TYPE;
nhg_entry_attrs[0].value.u32 = SAI_NEXT_HOP_GROUP_TYPE_PROTECTION;
saistatus = sai_frr_api->create_next_hop_group(&nhg1, switch_id, 1, nhg_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

nhg_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_ATTR_TYPE;
nhg_entry_attrs[0].value.u32 = SAI_NEXT_HOP_GROUP_TYPE_HW_PROTECTION;
saistatus = sai_frr_api->create_next_hop_group(&nhg2, switch_id, 1, nhg_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

nh_entry_attrs[0].id = SAI_NEXT_HOP_ATTR_TYPE;
nh_entry_attrs[0].value.u32 = SAI_NEXT_HOP_TYPE_IP;
nh_entry_attrs[1].id = SAI_NEXT_HOP_ATTR_IP;
CONVERT_STRING_TO_SAI_IPV4(nh_entry_attrs[1].value, "10.1.1.1");
nh_entry_attrs[2].id = SAI_NEXT_HOP_ATTR_ROUTER_INTERFACE_ID;
nh_entry_attrs[2].value.u64 = nh_1_interface_id;
saistatus = sai_frr_api->create_next_hop(&nh_1_id, switch_id, 2, nh_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

nh_entry_attrs[0].id = SAI_NEXT_HOP_ATTR_TYPE;
nh_entry_attrs[0].value.u32 = SAI_NEXT_HOP_TYPE_IP;
nh_entry_attrs[1].id = SAI_NEXT_HOP_ATTR_IP;
CONVERT_STRING_TO_SAI_IPV4(nh_entry_attrs[1].value, "10.1.2.1");
nh_entry_attrs[2].id = SAI_NEXT_HOP_ATTR_ROUTER_INTERFACE_ID;
nh_entry_attrs[2].value.u64 = nh_2_interface_id;
saistatus = sai_frr_api->create_next_hop(&nh_2_id, switch_id, 2, nh_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

// Program the primary NH Group member.
nhgm_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID;
nhgm_entry_attrs[0].value.oid = nhg1;
nhgm_entry_attrs[1].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID;
nhgm_entry_attrs[1].value.oid = nh_1_id;
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;
saistatus = sai_frr_api->create_next_hop_group_member(&nhgm_1_id, switch_id, 2, nhgm_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

nhgm_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID;
nhgm_entry_attrs[0].value.oid = nhg1;
nhgm_entry_attrs[1].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID;
nhgm_entry_attrs[1].value.oid = nh_2_id;
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;
saistatus = sai_frr_api->create_next_hop_group_member(&nhgm_1_id, switch_id, 2, nhgm_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

// Program the secondary NH Group member.
nhgm_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID;
nhgm_entry_attrs[0].value.oid = nhg1;
nhgm_entry_attrs[1].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID;
nhgm_entry_attrs[1].value.oid = nhg2;
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;
saistatus = sai_frr_api->create_next_hop_group_member(&nhgm_2_id, switch_id, 2, nhgm_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}

nhgm_entry_attrs[0].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID;
nhgm_entry_attrs[0].value.oid = nhg_id;
nhgm_entry_attrs[1].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID;
nhgm_entry_attrs[1].value.oid = nh_2_id;
nhgm_entry_attrs[2].id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_CONFIGURED_ROLE;
nhgm_entry_attrs[2].value.u32 = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_PRIMARY;
saistatus = sai_frr_api->create_next_hop_group_member(&nhgm_3_id, switch_id, 2, nhgm_entry_attrs);
if (saistatus != SAI_STATUS_SUCCESS) {
return saistatus;
}
```

## 5.0 Per-Monitored-Object Switchover Notification

To provide NOS visibility for HW-triggered FRR, add a switch-level notification callback.
Notification is generated per monitored object, and one monitored object may point to multiple protection groups.

### Notification

```c
typedef struct _sai_next_hop_group_hw_protection_switchover_notification_data_t
{
sai_object_id_t monitored_oid; // monitored object id
sai_next_hop_group_member_observed_role_t new_role // Current role after the switchover
uint32_t switchover_success_count; // number of protection groups switched successfully
sai_object_list_t failed_next_hop_groups; // failed protection-group object ids
} sai_next_hop_group_hw_protection_switchover_notification_data_t;

typedef void (*sai_next_hop_group_hw_protection_switchover_notification_fn)(
_In_ uint32_t count,
_In_ const sai_next_hop_group_hw_protection_switchover_notification_data_t *data);
```

### Switch attribute for callback registration

```c
SAI_SWITCH_ATTR_NEXT_HOP_GROUP_HW_PROTECTION_SWITCHOVER_NOTIFY
```

### Usage example

```c
switch_attr.id = SAI_SWITCH_ATTR_NEXT_HOP_GROUP_HW_PROTECTION_SWITCHOVER_NOTIFY;
switch_attr.value.ptr = (void*)nhg_hw_protection_switchover_cb;
sai_switch_api->set_switch_attribute(switch_id, &switch_attr);
```

Example callback report:
```c
monitored_oid = <monitored_object_oid>
new_role = SAI_NEXT_HOP_GROUP_MEMBER_CONFIGURED_ROLE_STANDBY
switchover_success_count = 5
failed_next_hop_groups.count = 2
failed_next_hop_groups.list[0] = failed_nhg_oid1
failed_next_hop_groups.list[1] = failed_nhg_oid2
```
In this example, `switchover_success_count = 5` means five protection groups switched over successfully
for the monitored object, while `failed_next_hop_groups.count = 2` means two protection groups failed.
Loading
Loading