Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/DbInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ constexpr auto DEFAULT_TIMEOUT_MSEC = 1000;
std::vector<std::string> DbInterface::mMuxState = {"active", "standby", "unknown", "Error"};
std::vector<std::string> DbInterface::mMuxLinkmgrState = {"uninitialized", "unhealthy", "healthy"};
std::vector<std::string> DbInterface::mMuxMetrics = {"start", "end"};
std::vector<std::string> DbInterface::mLinkProbeMetrics = {"link_prober_unknown_start", "link_prober_unknown_end"};
std::vector<std::string> DbInterface::mLinkProbeMetrics = {"link_prober_unknown_start", "link_prober_unknown_end", "link_prober_wait_start", "link_prober_active_start", "link_prober_standby_start"};

//
// ---> DbInterface(mux::MuxManager *muxManager);
Expand Down Expand Up @@ -168,13 +168,13 @@ void DbInterface::postMetricsEvent(
// link_manager::ActiveStandbyStateMachine::LinkProberMetrics metrics
// );
//
// post link probe pck loss event to state db
// post link probe event to state db
void DbInterface::postLinkProberMetricsEvent(
const std::string &portName,
link_manager::ActiveStandbyStateMachine::LinkProberMetrics metrics
)
{
MUXLOGWARNING(boost::format("%s: posting link prober pck loss event %s") %
MUXLOGWARNING(boost::format("%s: posting link prober event %s") %
portName %
mLinkProbeMetrics[static_cast<int> (metrics)]
);
Expand Down Expand Up @@ -393,7 +393,7 @@ void DbInterface::handlePostLinkProberMetrics(
boost::posix_time::ptime time
)
{
MUXLOGWARNING(boost::format("%s: posting link prober pck loss event %s") %
MUXLOGWARNING(boost::format("%s: posting link prober event %s") %
portName %
mLinkProbeMetrics[static_cast<int> (metrics)]
);
Expand Down
6 changes: 3 additions & 3 deletions src/DbInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,10 @@ class DbInterface
/**
* @method postLinkProberMetricsEvent
*
* @brief post link prober pck loss event
* @brief post link prober event
*
* @param portName (in) port name
* @param metrics (in) pck loss event name
* @param metrics (in) link prober event name
*
* @return none
*
Expand Down Expand Up @@ -316,7 +316,7 @@ class DbInterface
/**
* @method handlePostLinkProberMetrics
*
* @brief post link prober pck loss event to state db
* @brief post link prober event to state db
*
* @param portName (in) port name
* @param metrics (in) metrics data
Expand Down
11 changes: 11 additions & 0 deletions src/common/MuxConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,15 @@ class MuxConfig
*@return IPv4 address
*/
inline boost::asio::ip::address getLoopbackIpv4Address() {return mLoopbackIpv4Address;};

/**
*@method getDecreasedTimeoutIpv4_msec
*
*@brief getter for decreased IPv4 LinkProber timeout in msec
*
*@return timeout in msec
*/
inline uint32_t getDecreasedTimeoutIpv4_msec() const {return mDecreasedTimeoutIpv4_msec;};

private:
uint8_t mNumberOfThreads = 5;
Expand All @@ -277,6 +286,8 @@ class MuxConfig
uint32_t mMuxStateChangeRetryCount = 1;
uint32_t mLinkStateChangeRetryCount = 1;

uint32_t mDecreasedTimeoutIpv4_msec = 10;

std::array<uint8_t, ETHER_ADDR_LEN> mTorMacAddress;
boost::asio::ip::address mLoopbackIpv4Address = boost::asio::ip::make_address("10.212.64.0");
};
Expand Down
9 changes: 9 additions & 0 deletions src/common/MuxPortConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,15 @@ class MuxPortConfig
*/
inline PortCableType getPortCableType() const {return mPortCableType;};

/**
*@method getDecreasedTimeoutIpv4_msec
*
*@brief getter for decreased IPv4 LinkProber timeout in msec
*
*@return timeout in msec
*/
inline uint32_t getDecreasedTimeoutIpv4_msec() const {return mMuxConfig.getDecreasedTimeoutIpv4_msec();};

private:
MuxConfig &mMuxConfig;
std::string mPortName;
Expand Down
23 changes: 20 additions & 3 deletions src/link_manager/LinkManagerStateMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@ void ActiveStandbyStateMachine::enterLinkProberState(CompositeState &nextState,
{
mLinkProberStateMachinePtr->enterState(label);
ps(nextState) = label;

// link prober entering wait indicating switchover is initiated, but a switchover can be skipped if mode == manual.
if(label == link_prober::LinkProberState::Label::Wait) {
mMuxPortPtr->postLinkProberMetricsEvent(link_manager::ActiveStandbyStateMachine::LinkProberMetrics::LinkProberWaitStart);
}
}

//
Expand Down Expand Up @@ -332,6 +337,7 @@ void ActiveStandbyStateMachine::switchMuxState(
mMuxStateMachine.setWaitStateCause(mux_state::WaitState::WaitStateCause::SwssUpdate);
mMuxPortPtr->postMetricsEvent(Metrics::SwitchingStart, label);
mMuxPortPtr->setMuxState(label);
mDecreaseIntervalFnPtr(mMuxPortConfig.getLinkWaitTimeout_msec());
mDeadlineTimer.cancel();
startMuxWaitTimer();
} else {
Expand Down Expand Up @@ -380,6 +386,12 @@ void ActiveStandbyStateMachine::handleSwssBladeIpv4AddressUpdate(boost::asio::ip
mResetIcmpPacketCountsFnPtr = boost::bind(
&link_prober::LinkProber::resetIcmpPacketCounts, mLinkProberPtr.get()
);
mDecreaseIntervalFnPtr = boost::bind(
&link_prober::LinkProber::decreaseProbeIntervalAfterSwitch, mLinkProberPtr.get(), boost::placeholders::_1
);
mRevertIntervalFnPtr = boost::bind(
&link_prober::LinkProber::revertProbeIntervalAfterSwitchComplete, mLinkProberPtr.get()
);
mComponentInitState.set(LinkProberComponent);

activateStateMachine();
Expand Down Expand Up @@ -445,13 +457,17 @@ void ActiveStandbyStateMachine::handleStateChange(LinkProberEvent &event, link_p
mLinkProberStateName[state]
);

// update state db link prober metrics to collect pck loss data
// update state db link prober metrics to collect link prober state change data
if (mContinuousLinkProberUnknownEvent == true && state != link_prober::LinkProberState::Unknown) {
mContinuousLinkProberUnknownEvent = false;
mMuxPortPtr->postLinkProberMetricsEvent(link_manager::ActiveStandbyStateMachine::LinkProberMetrics::LinkProberUnknownEnd);
} else if (state == link_prober::LinkProberState::Label::Unknown) {
mContinuousLinkProberUnknownEvent = true;
mMuxPortPtr->postLinkProberMetricsEvent(link_manager::ActiveStandbyStateMachine::LinkProberMetrics::LinkProberUnknownStart);
} else if (state == link_prober::LinkProberState::Label::Active) {
mMuxPortPtr->postLinkProberMetricsEvent(link_manager::ActiveStandbyStateMachine::LinkProberMetrics::LinkProberActiveStart);
} else if (state == link_prober::LinkProberState::Label::Standby) {
mMuxPortPtr->postLinkProberMetricsEvent(link_manager::ActiveStandbyStateMachine::LinkProberMetrics::LinkProberStandbyStart);
}

CompositeState nextState = mCompositeState;
Expand Down Expand Up @@ -834,7 +850,7 @@ void ActiveStandbyStateMachine::handleDefaultRouteStateNotification(const std::s
if (mComponentInitState.test(MuxStateComponent)) {
if (ms(mCompositeState) != mux_state::MuxState::Label::Standby && routeState == "na") {
mSendPeerSwitchCommandFnPtr();
// In case Mux is in wait state, switchMuxSate(standby) will be skipped. Setting mux state in app db to be standby so tunnel can be established.
// In case Mux is in wait state, switchMuxState(standby) will be skipped. Setting mux state in app db to be standby so tunnel can be established.
mMuxPortPtr->setMuxState(mux_state::MuxState::Label::Standby);
} else {
enterMuxWaitState(mCompositeState);
Expand Down Expand Up @@ -883,6 +899,7 @@ void ActiveStandbyStateMachine::updateMuxLinkmgrState()
(ps(mCompositeState) == link_prober::LinkProberState::Label::Standby &&
ms(mCompositeState) == mux_state::MuxState::Label::Standby))) {
label = Label::Healthy;
mRevertIntervalFnPtr();
}

setLabel(label);
Expand Down Expand Up @@ -962,7 +979,7 @@ void ActiveStandbyStateMachine::handleMuxWaitTimeout(boost::system::error_code e
// on the 3rd timeout, send switch active command to peer
if (mMuxWaitTimeoutCount == mMuxPortConfig.getNegativeStateChangeRetryCount()) {
mSendPeerSwitchCommandFnPtr();
// Mux is in wait state, switchMuxSate(standby) will be skipped. Setting mux state in app db to be standby so tunnel can be established.
// Mux is in wait state, switchMuxState(standby) will be skipped. Setting mux state in app db to be standby so tunnel can be established.
mMuxPortPtr->setMuxState(mux_state::MuxState::Label::Standby);
}
} else {
Expand Down
33 changes: 32 additions & 1 deletion src/link_manager/LinkManagerStateMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ class ActiveStandbyStateMachine: public LinkManagerStateMachineBase,
enum class LinkProberMetrics {
LinkProberUnknownStart,
LinkProberUnknownEnd,
LinkProberWaitStart,
LinkProberActiveStart,
LinkProberStandbyStart,

Count
};
Expand Down Expand Up @@ -811,6 +814,32 @@ class ActiveStandbyStateMachine: public LinkManagerStateMachineBase,
*/
void setComponentInitState(uint8_t component) {mComponentInitState.set(component);};

/**
* @method setDecreaseIntervalFnPtr
*
* @brief set new DecreaseIntervalFnPtr for the state machine. This method is used for testing
*
* @param DecreaseIntervalFnPtr (in) pointer to new DecreaseIntervalFnPtr
*
* @return none
*/
void setDecreaseIntervalFnPtr(boost::function<void (uint32_t switchTime_msec)> DecreaseIntervalFnPtr) {
mDecreaseIntervalFnPtr = DecreaseIntervalFnPtr;
};

/**
* @method setRevertIntervalFnPtr
*
* @brief set new RevertIntervalFnPtr for the state machine. This method is used for testing
*
* @param RevertIntervalFnPtr (in) pointer to new RevertIntervalFnPtr
*
* @return none
*/
void setRevertIntervalFnPtr(boost::function<void ()> RevertIntervalFnPtr) {
mRevertIntervalFnPtr = RevertIntervalFnPtr;
};

private:
link_state::LinkState::Label mPeerLinkState = link_state::LinkState::Label::Down;

Expand All @@ -825,6 +854,8 @@ class ActiveStandbyStateMachine: public LinkManagerStateMachineBase,
boost::function<void ()> mResumeTxFnPtr;
boost::function<void ()> mSendPeerSwitchCommandFnPtr;
boost::function<void ()> mResetIcmpPacketCountsFnPtr;
boost::function<void (uint32_t switchTime_msec)> mDecreaseIntervalFnPtr;
boost::function<void ()> mRevertIntervalFnPtr;

uint32_t mWaitActiveUpCount = 0;
uint32_t mMuxUnknownBackoffFactor = 1;
Expand All @@ -833,7 +864,7 @@ class ActiveStandbyStateMachine: public LinkManagerStateMachineBase,
bool mPendingMuxModeChange = false;
common::MuxPortConfig::Mode mTargetMuxMode = common::MuxPortConfig::Mode::Auto;

bool mContinuousLinkProberUnknownEvent = false;
bool mContinuousLinkProberUnknownEvent = false; // When posting unknown_end event, we want to make sure the previous state is unknown.

std::bitset<ComponentCount> mComponentInitState = {0};
};
Expand Down
60 changes: 59 additions & 1 deletion src/link_prober/LinkProber.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ LinkProber::LinkProber(
mStrand(mIoService),
mDeadlineTimer(mIoService),
mSuspendTimer(mIoService),
mSwitchoverTimer(mIoService),
mStream(mIoService)
{
try {
Expand Down Expand Up @@ -532,7 +533,7 @@ void LinkProber::startTimer()
{
MUXLOGDEBUG(mMuxPortConfig.getPortName());
// time out these heartbeats
mDeadlineTimer.expires_from_now(boost::posix_time::milliseconds(mMuxPortConfig.getTimeoutIpv4_msec()));
mDeadlineTimer.expires_from_now(boost::posix_time::milliseconds(getProbingInterval()));
mDeadlineTimer.async_wait(mStrand.wrap(boost::bind(
&LinkProber::handleTimeout,
this,
Expand Down Expand Up @@ -748,4 +749,61 @@ void LinkProber::resetIcmpPacketCounts()
)));
}

//
// ---> decreaseProbeIntervalAfterSwitch(uint32_t switchTime_msec);
//
// adjust link prober interval to 10 ms after switchover to better measure the switchover overhead.
//
void LinkProber::decreaseProbeIntervalAfterSwitch(uint32_t switchTime_msec)
{
MUXLOGDEBUG(mMuxPortConfig.getPortName());

mSwitchoverTimer.expires_from_now(boost::posix_time::milliseconds(switchTime_msec));
mSwitchoverTimer.async_wait(mStrand.wrap(boost::bind(
&LinkProber::handleSwitchoverTimeout,
this,
boost::asio::placeholders::error
)));

mDecreaseProbingInterval = true;
}

// ---> revertProbeIntervalAfterSwitchComplete();
//
// revert probe interval change after switchover is completed
//
void LinkProber::revertProbeIntervalAfterSwitchComplete()
{
MUXLOGDEBUG(mMuxPortConfig.getPortName());

mSwitchoverTimer.cancel();
mDecreaseProbingInterval = false;
}

//
// ---> handleSwitchoverTimeout(boost::system::error_code errorCode)
//
// handle switchover time out
//
void LinkProber::handleSwitchoverTimeout(boost::system::error_code errorCode)
{
MUXLOGDEBUG(mMuxPortConfig.getPortName());

mDecreaseProbingInterval = false;
if (errorCode == boost::system::errc::success) {
MUXLOGWARNING(boost::format("%s: link prober timeout on waiting for expected ICMP event after switchover is triggered ") % mMuxPortConfig.getPortName());
}
}

//
// ---> getProbingInterval
//
// get link prober interval
//
inline uint32_t LinkProber::getProbingInterval()
{
MUXLOGDEBUG(mMuxPortConfig.getPortName());
return mDecreaseProbingInterval? mMuxPortConfig.getDecreasedTimeoutIpv4_msec():mMuxPortConfig.getTimeoutIpv4_msec();
}

} /* namespace link_prober */
43 changes: 43 additions & 0 deletions src/link_prober/LinkProber.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,27 @@ class LinkProber
*/
void resetIcmpPacketCounts();

/**
* @method decreaseProbeIntervalAfterSwitch
*
* @brief adjust link prober interval to 10 ms after switchover to better measure the switchover overhead.
*
* @param switchTime_msec (in) switchover is expected to complete within this time window
* @param expectingLinkProberEvent (in) depends on which state LinkManager is switching to, link prober expects self or peer events
*
* @return none
*/
void decreaseProbeIntervalAfterSwitch(uint32_t switchTime_msec);

/**
* @method revertProbeIntervalAfterSwitchComplete
*
* @brief revert probe interval change after switchover is completed
*
* @return none
*/
void revertProbeIntervalAfterSwitchComplete();

private:
/**
*@method handleUpdateEthernetFrame
Expand Down Expand Up @@ -408,6 +429,26 @@ class LinkProber
*@return the appended TLV size
*/
size_t appendTlvDummy(size_t paddingSize, int seqNo);

/**
* @method getProbingInterval
*
* @brief get link prober interval
*
* @return link prober interval
*/
inline uint32_t getProbingInterval();

/**
* @method handleSwitchoverTimeout
*
* @brief handle switchover time out
*
* @param errorCode (in) socket error code
*
* @return none
*/
void handleSwitchoverTimeout(boost::system::error_code errorCode);

friend class test::LinkProberTest;

Expand All @@ -432,6 +473,7 @@ class LinkProber
boost::asio::io_service::strand mStrand;
boost::asio::deadline_timer mDeadlineTimer;
boost::asio::deadline_timer mSuspendTimer;
boost::asio::deadline_timer mSwitchoverTimer;
boost::asio::posix::stream_descriptor mStream;

std::shared_ptr<SockFilter> mSockFilterPtr;
Expand All @@ -444,6 +486,7 @@ class LinkProber
std::array<uint8_t, MUX_MAX_ICMP_BUFFER_SIZE> mRxBuffer;

bool mSuspendTx = false;
bool mDecreaseProbingInterval = false;

uint64_t mIcmpUnknownEventCount = 0;
uint64_t mIcmpPacketCount = 0;
Expand Down
Loading