Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ teamsyncd/teamsyncd
tests/tests
tests/mock_tests/tests_response_publisher
tests/mock_tests/tests_fpmsyncd
tests/mock_tests/tests_intfmgrd
tests/mock_tests/tests_portsyncd


# Test Files #
Expand All @@ -87,5 +89,7 @@ tests/mock_tests/tests.trs
tests/test-suite.log
tests/tests.log
tests/tests.trs
tests/mock_tests/**/*log
tests/mock_tests/**/*trs
orchagent/p4orch/tests/**/*gcda
orchagent/p4orch/tests/**/*gcno
11 changes: 11 additions & 0 deletions orchagent/aclorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,11 @@ bool AclRule::createRule()
status = sai_acl_api->create_acl_entry(&m_ruleOid, gSwitchId, (uint32_t)rule_attrs.size(), rule_attrs.data());
if (status != SAI_STATUS_SUCCESS)
{
if (status == SAI_STATUS_ITEM_ALREADY_EXISTS)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@theasianpianist, is this change required for mux idempotency/rollback? @bingwang-ms , can you please review?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the ACL entry already exists, the SAI API will return this status, when we rollback we might hit this scenario so want to continue normally if the entry does already exist.

{
SWSS_LOG_NOTICE("ACL rule %s already exists", m_id.c_str());
return true;
}
SWSS_LOG_ERROR("Failed to create ACL rule %s, rv:%d",
m_id.c_str(), status);
AclRange::remove(range_objects, range_object_list.count);
Expand Down Expand Up @@ -1201,6 +1206,12 @@ bool AclRule::removeRule()
auto status = sai_acl_api->remove_acl_entry(m_ruleOid);
if (status != SAI_STATUS_SUCCESS)
{
if (status == SAI_STATUS_ITEM_NOT_FOUND)
{
SWSS_LOG_NOTICE("ACL rule already deleted");
m_ruleOid = SAI_NULL_OBJECT_ID;
return true;
}
SWSS_LOG_ERROR("Failed to delete ACL rule, status %s", sai_serialize_status(status).c_str());
return false;
}
Expand Down
118 changes: 83 additions & 35 deletions orchagent/muxorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ static sai_status_t create_route(IpPrefix &pfx, sai_object_id_t nh)
sai_status_t status = sai_route_api->create_route_entry(&route_entry, (uint32_t)attrs.size(), attrs.data());
if (status != SAI_STATUS_SUCCESS)
{
if (status == SAI_STATUS_ITEM_ALREADY_EXISTS) {
SWSS_LOG_NOTICE("Tunnel route to %s already exists", pfx.to_string().c_str());
return SAI_STATUS_SUCCESS;
}
SWSS_LOG_ERROR("Failed to create tunnel route %s,nh %" PRIx64 " rv:%d",
pfx.getIp().to_string().c_str(), nh, status);
return status;
Expand Down Expand Up @@ -145,6 +149,10 @@ static sai_status_t remove_route(IpPrefix &pfx)
sai_status_t status = sai_route_api->remove_route_entry(&route_entry);
if (status != SAI_STATUS_SUCCESS)
{
if (status == SAI_STATUS_ITEM_NOT_FOUND) {
SWSS_LOG_NOTICE("Tunnel route to %s already removed", pfx.to_string().c_str());
return SAI_STATUS_SUCCESS;
}
SWSS_LOG_ERROR("Failed to remove tunnel route %s, rv:%d",
pfx.getIp().to_string().c_str(), status);
return status;
Expand Down Expand Up @@ -469,15 +477,15 @@ void MuxCable::setState(string new_state)

mux_cb_orch_->updateMuxMetricState(mux_name_, new_state, true);

MuxState state = state_;
prev_state_ = state_;
state_ = ns;

st_chg_in_progress_ = true;

if (!(this->*(state_machine_handlers_[it->second]))())
{
//Reset back to original state
state_ = state;
state_ = prev_state_;
st_chg_in_progress_ = false;
st_chg_failed_ = true;
throw std::runtime_error("Failed to handle state transition");
Expand All @@ -493,6 +501,51 @@ void MuxCable::setState(string new_state)
return;
}

void MuxCable::rollbackStateChange()
{
if (prev_state_ == MuxState::MUX_STATE_FAILED || prev_state_ == MuxState::MUX_STATE_PENDING)
{
SWSS_LOG_ERROR("[%s] Rollback to %s not supported", mux_name_.c_str(),
muxStateValToString.at(prev_state_).c_str());
return;
}
SWSS_LOG_WARN("[%s] Rolling back state change to %s", mux_name_.c_str(),
muxStateValToString.at(prev_state_).c_str());
mux_cb_orch_->updateMuxMetricState(mux_name_, muxStateValToString.at(prev_state_), true);
st_chg_in_progress_ = true;
state_ = prev_state_;
bool success = false;
switch (prev_state_)
{
case MuxState::MUX_STATE_ACTIVE:
success = stateActive();
break;
case MuxState::MUX_STATE_INIT:
case MuxState::MUX_STATE_STANDBY:
success = stateStandby();
break;
case MuxState::MUX_STATE_FAILED:
case MuxState::MUX_STATE_PENDING:
// Check at the start of the function means we will never reach here
SWSS_LOG_ERROR("[%s] Rollback to %s not supported", mux_name_.c_str(),
muxStateValToString.at(prev_state_).c_str());
return;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will leave st_chg_in_progress_ as true, right?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but we will never get here since the check on the first line of this function will return early for FAILED and PENDING states

}
st_chg_in_progress_ = false;
if (success)
{
st_chg_failed_ = false;
}
else
{
st_chg_failed_ = true;
SWSS_LOG_ERROR("[%s] Rollback to %s failed",
mux_name_.c_str(), muxStateValToString.at(prev_state_).c_str());
}
mux_cb_orch_->updateMuxMetricState(mux_name_, muxStateValToString.at(state_), false);
mux_cb_orch_->updateMuxState(mux_name_, muxStateValToString.at(state_));
}

string MuxCable::getState()
{
SWSS_LOG_INFO("Get state request for %s, state %s",
Expand Down Expand Up @@ -785,8 +838,6 @@ void MuxNbrHandler::updateTunnelRoute(NextHopKey nh, bool add)
}
}

std::map<std::string, AclTable> MuxAclHandler::acl_table_;

MuxAclHandler::MuxAclHandler(sai_object_id_t port, string alias)
{
SWSS_LOG_ENTER();
Expand All @@ -804,32 +855,21 @@ MuxAclHandler::MuxAclHandler(sai_object_id_t port, string alias)
port_ = port;
alias_ = alias;

auto found = acl_table_.find(table_name);
if (found == acl_table_.end())
{
SWSS_LOG_NOTICE("First time create for port %" PRIx64 "", port);
// Always try to create the table first. If it already exists, function will return early.
createMuxAclTable(port, table_name);

// First time handling of Mux Table, create ACL table, and bind
createMuxAclTable(port, table_name);
SWSS_LOG_NOTICE("Binding port %" PRIx64 "", port);

AclRule* rule = gAclOrch->getAclRule(table_name, rule_name);
if (rule == nullptr)
{
shared_ptr<AclRulePacket> newRule =
make_shared<AclRulePacket>(gAclOrch, rule_name, table_name, false /*no counters*/);
createMuxAclRule(newRule, table_name);
}
else
{
SWSS_LOG_NOTICE("Binding port %" PRIx64 "", port);

AclRule* rule = gAclOrch->getAclRule(table_name, rule_name);
if (rule == nullptr)
{
shared_ptr<AclRulePacket> newRule =
make_shared<AclRulePacket>(gAclOrch, rule_name, table_name, false /*no counters*/);
createMuxAclRule(newRule, table_name);
}
else
{
gAclOrch->updateAclRule(table_name, rule_name, MATCH_IN_PORTS, &port, RULE_OPER_ADD);
}
gAclOrch->updateAclRule(table_name, rule_name, MATCH_IN_PORTS, &port, RULE_OPER_ADD);
}
}

Expand Down Expand Up @@ -862,23 +902,16 @@ void MuxAclHandler::createMuxAclTable(sai_object_id_t port, string strTable)
{
SWSS_LOG_ENTER();

auto inserted = acl_table_.emplace(piecewise_construct,
std::forward_as_tuple(strTable),
std::forward_as_tuple(gAclOrch, strTable));

assert(inserted.second);

AclTable& acl_table = inserted.first->second;

sai_object_id_t table_oid = gAclOrch->getTableById(strTable);
if (table_oid != SAI_NULL_OBJECT_ID)
{
// DROP ACL table is already created
SWSS_LOG_NOTICE("ACL table %s exists, reuse the same", strTable.c_str());
acl_table = *(gAclOrch->getTableByOid(table_oid));
SWSS_LOG_INFO("ACL table %s exists, reuse the same", strTable.c_str());
return;
}

SWSS_LOG_NOTICE("First time create for port %" PRIx64 "", port);
AclTable acl_table(gAclOrch, strTable);
auto dropType = gAclOrch->getAclTableType(TABLE_TYPE_DROP);
assert(dropType);
acl_table.validateAddType(*dropType);
Expand Down Expand Up @@ -1582,10 +1615,25 @@ bool MuxCableOrch::addOperation(const Request& request)
{
mux_obj->setState(state);
}
catch(const std::runtime_error& error)
catch(const std::runtime_error& e)
{
SWSS_LOG_ERROR("Mux Error setting state %s for port %s. Error: %s",
state.c_str(), port_name.c_str(), error.what());
state.c_str(), port_name.c_str(), e.what());
mux_obj->rollbackStateChange();
return true;
}
catch (const std::logic_error& e)
{
SWSS_LOG_ERROR("Logic error while setting state %s for port %s. Error: %s",
state.c_str(), port_name.c_str(), e.what());
mux_obj->rollbackStateChange();
return true;
}
catch (const std::exception& e)
{
SWSS_LOG_ERROR("Exception caught while setting state %s for port %s. Error: %s",
state.c_str(), port_name.c_str(), e.what());
mux_obj->rollbackStateChange();
return true;
}

Expand Down
4 changes: 2 additions & 2 deletions orchagent/muxorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ class MuxAclHandler
void createMuxAclRule(shared_ptr<AclRulePacket> rule, string strTable);
void bindAllPorts(AclTable &acl_table);

// class shared dict: ACL table name -> ACL table
static std::map<std::string, AclTable> acl_table_;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this removed? i think its changing the overall logic.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

based on my understanding, this was used to cache if the ACL table had already been created in hardware. I changed the behavior to delegate this check to AclOrch instead, since the static scope of this map was preventing the mock_tests from passing. The MuxAclHandler now always asks AclOrch to check if the table already exists, instead of storing that info in MuxAclHandler.

sai_object_id_t port_ = SAI_NULL_OBJECT_ID;
bool is_ingress_acl_ = true;
string alias_;
Expand Down Expand Up @@ -97,6 +95,7 @@ class MuxCable
using state_machine_handlers = map<MuxStateChange, bool (MuxCable::*)()>;

void setState(string state);
void rollbackStateChange();
string getState();
bool isStateChangeInProgress() { return st_chg_in_progress_; }
bool isStateChangeFailed() { return st_chg_failed_; }
Expand All @@ -120,6 +119,7 @@ class MuxCable
MuxCableType cable_type_;

MuxState state_ = MuxState::MUX_STATE_INIT;
MuxState prev_state_;
bool st_chg_in_progress_ = false;
bool st_chg_failed_ = false;

Expand Down
35 changes: 21 additions & 14 deletions orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,12 @@ bool NeighOrch::addNextHop(const NextHopKey &nh)
sai_status_t status = sai_next_hop_api->create_next_hop(&next_hop_id, gSwitchId, (uint32_t)next_hop_attrs.size(), next_hop_attrs.data());
if (status != SAI_STATUS_SUCCESS)
{
if (status == SAI_STATUS_ITEM_ALREADY_EXISTS)
{
SWSS_LOG_NOTICE("Next hop %s on %s already exists",
nexthop.ip_address.to_string().c_str(), nexthop.alias.c_str());
return true;
}
SWSS_LOG_ERROR("Failed to create next hop %s on %s, rv:%d",
nexthop.ip_address.to_string().c_str(), nexthop.alias.c_str(), status);
task_process_status handle_status = handleSaiCreateStatus(SAI_API_NEXT_HOP, status);
Expand Down Expand Up @@ -1014,7 +1020,7 @@ bool NeighOrch::removeNeighbor(const NeighborEntry &neighborEntry, bool disable)
/* When next hop is not found, we continue to remove neighbor entry. */
if (status == SAI_STATUS_ITEM_NOT_FOUND)
{
SWSS_LOG_ERROR("Failed to locate next hop %s on %s, rv:%d",
SWSS_LOG_NOTICE("Next hop %s on %s doesn't exist, rv:%d",
ip_address.to_string().c_str(), alias.c_str(), status);
}
else
Expand Down Expand Up @@ -1049,9 +1055,8 @@ bool NeighOrch::removeNeighbor(const NeighborEntry &neighborEntry, bool disable)
{
if (status == SAI_STATUS_ITEM_NOT_FOUND)
{
SWSS_LOG_ERROR("Failed to locate neighbor %s on %s, rv:%d",
SWSS_LOG_NOTICE("Neighbor %s on %s already removed, rv:%d",
m_syncdNeighbors[neighborEntry].mac.to_string().c_str(), alias.c_str(), status);
return true;
}
else
{
Expand All @@ -1064,22 +1069,24 @@ bool NeighOrch::removeNeighbor(const NeighborEntry &neighborEntry, bool disable)
}
}
}

if (neighbor_entry.ip_address.addr_family == SAI_IP_ADDR_FAMILY_IPV4)
{
gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_IPV4_NEIGHBOR);
}
else
{
gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_IPV6_NEIGHBOR);
}
if (neighbor_entry.ip_address.addr_family == SAI_IP_ADDR_FAMILY_IPV4)
{
gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_IPV4_NEIGHBOR);
}
else
{
gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_IPV6_NEIGHBOR);
}

removeNextHop(ip_address, alias);
m_intfsOrch->decreaseRouterIntfsRefCount(alias);
removeNextHop(ip_address, alias);
m_intfsOrch->decreaseRouterIntfsRefCount(alias);
SWSS_LOG_NOTICE("Removed neighbor %s on %s",
m_syncdNeighbors[neighborEntry].mac.to_string().c_str(), alias.c_str());
}
}

SWSS_LOG_NOTICE("Removed neighbor %s on %s",
m_syncdNeighbors[neighborEntry].mac.to_string().c_str(), alias.c_str());

/* Do not delete entry from cache if its disable request */
if (disable)
Expand Down
1 change: 1 addition & 0 deletions tests/mock_tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ tests_SOURCES = aclorch_ut.cpp \
flowcounterrouteorch_ut.cpp \
orchdaemon_ut.cpp \
intfsorch_ut.cpp \
mux_rollback_ut.cpp \
warmrestartassist_ut.cpp \
test_failure_handling.cpp \
$(top_srcdir)/lib/gearboxutils.cpp \
Expand Down
3 changes: 3 additions & 0 deletions tests/mock_tests/mock_orchagent_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ extern VRFOrch *gVrfOrch;
extern NhgOrch *gNhgOrch;
extern Srv6Orch *gSrv6Orch;
extern BfdOrch *gBfdOrch;
extern AclOrch *gAclOrch;
extern PolicerOrch *gPolicerOrch;
extern Directory<Orch*> gDirectory;

extern sai_acl_api_t *sai_acl_api;
Expand All @@ -70,6 +72,7 @@ extern sai_route_api_t *sai_route_api;
extern sai_neighbor_api_t *sai_neighbor_api;
extern sai_tunnel_api_t *sai_tunnel_api;
extern sai_next_hop_api_t *sai_next_hop_api;
extern sai_next_hop_group_api_t *sai_next_hop_group_api;
extern sai_hostif_api_t *sai_hostif_api;
extern sai_policer_api_t *sai_policer_api;
extern sai_buffer_api_t *sai_buffer_api;
Expand Down
Loading