Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion cfgmgr/nbrmgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ NbrMgr::NbrMgr(DBConnector *cfgDb, DBConnector *appDb, DBConnector *stateDb, con
m_statePortTable(stateDb, STATE_PORT_TABLE_NAME),
m_stateLagTable(stateDb, STATE_LAG_TABLE_NAME),
m_stateVlanTable(stateDb, STATE_VLAN_TABLE_NAME),
m_stateIntfTable(stateDb, STATE_INTERFACE_TABLE_NAME)
m_stateIntfTable(stateDb, STATE_INTERFACE_TABLE_NAME),
m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME)
{
int err = 0;

Expand Down Expand Up @@ -91,6 +92,19 @@ bool NbrMgr::isIntfStateOk(const string &alias)
return false;
}

bool NbrMgr::isNeighRestoreDone()
{
string value;

m_stateNeighRestoreTable.hget("Flags", "restored", value);
if (value == "true")
{
SWSS_LOG_INFO("Kernel neighbor table restore is done");
return true;
}
return false;
}

bool NbrMgr::setNeighbor(const string& alias, const IpAddress& ip, const MacAddress& mac)
{
SWSS_LOG_ENTER();
Expand Down
4 changes: 3 additions & 1 deletion cfgmgr/nbrmgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@ class NbrMgr : public Orch
NbrMgr(DBConnector *cfgDb, DBConnector *appDb, DBConnector *stateDb, const vector<string> &tableNames);
using Orch::doTask;

bool isNeighRestoreDone();

private:
bool isIntfStateOk(const string &alias);
bool setNeighbor(const string& alias, const IpAddress& ip, const MacAddress& mac);

void doTask(Consumer &consumer);

Table m_statePortTable, m_stateLagTable, m_stateVlanTable, m_stateIntfTable;
Table m_statePortTable, m_stateLagTable, m_stateVlanTable, m_stateIntfTable, m_stateNeighRestoreTable;
struct nl_sock *m_nl_sock;
};

Expand Down
19 changes: 19 additions & 0 deletions cfgmgr/nbrmgrd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <mutex>
#include <fstream>
#include <iostream>
#include <chrono>

#include "select.h"
#include "exec.h"
Expand All @@ -12,6 +13,9 @@
using namespace std;
using namespace swss;

#define RESTORE_NEIGH_WAIT_TIME_OUT 120
#define RESTORE_NEIGH_WAIT_TIME_INT 10

/* select() function timeout retry time, in millisecond */
#define SELECT_TIMEOUT 1000

Expand Down Expand Up @@ -50,6 +54,21 @@ int main(int argc, char **argv)

NbrMgr nbrmgr(&cfgDb, &appDb, &stateDb, cfg_nbr_tables);

chrono::steady_clock::time_point starttime = chrono::steady_clock::now();
while (!nbrmgr.isNeighRestoreDone())
{
chrono::duration<double> time_span = chrono::duration_cast<chrono::duration<double>>
(chrono::steady_clock::now() - starttime);
int pasttime = int(time_span.count());
SWSS_LOG_INFO("Kernel neighbor table restoration waited for %d seconds", pasttime);
if (pasttime > RESTORE_NEIGH_WAIT_TIME_OUT)
{
SWSS_LOG_WARN("Kernel neighbor table restore is not finished!");
break;
}
sleep(RESTORE_NEIGH_WAIT_TIME_INT);
}

std::vector<Orch *> cfgOrchList = {&nbrmgr};

swss::Select s;
Expand Down
57 changes: 47 additions & 10 deletions neighsyncd/restore_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,29 @@
from scapy.all import conf, in6_getnsma, inet_pton, inet_ntop, in6_getnsmac, get_if_hwaddr, Ether, ARP, IPv6, ICMPv6ND_NS, ICMPv6NDOptSrcLLAddr
from swsscommon import swsscommon
import errno
import syslog

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)
logger.addHandler(logging.NullHandler())

SYSLOG_IDENTIFIER = 'restore_neighbor'

def log_info(msg):
syslog.openlog(SYSLOG_IDENTIFIER)
syslog.syslog(syslog.LOG_INFO, msg)
syslog.closelog()

def log_warning(msg):
syslog.openlog(SYSLOG_IDENTIFIER)
syslog.syslog(syslog.LOG_WARNING, msg)
syslog.closelog()

def log_error(msg):
syslog.openlog(SYSLOG_IDENTIFIER)
syslog.syslog(syslog.LOG_ERR, msg)
syslog.closelog()

# timeout the restore process in 110 seconds if not finished
# This is mostly to wait for interfaces to be created and up after system warm-reboot
# and this process is started by supervisord in swss docker.
Expand Down Expand Up @@ -58,12 +76,27 @@ def is_intf_oper_state_up(intf):
state_file = open(oper_file.format(intf), 'r')
state = state_file.readline().rstrip()
except Exception as e:
logger.info('Error: {}'.format(str(e)))
log_info('Error: {}'.format(str(e)))
return False
if state == '1':
return True
return False

def is_intf_up(intf, db):
if not is_intf_oper_state_up(intf):
return False
if 'Vlan' in intf:
table_name = 'VLAN_MEMBER_TABLE|{}|*'.format(intf)
key = db.keys(db.STATE_DB, table_name)
if key is None:
log_info ("Vlan member is not yet created")
return False
if is_intf_up.counter == 0:
time.sleep(3*CHECK_INTERVAL)
is_intf_up.counter = 1
log_info ("intf {} is up".format(intf))
return True

# read the neigh table from AppDB to memory, format as below
# build map as below, this can efficiently access intf and family groups later
# { intf1 -> { { family1 -> [[ip1, mac1], [ip2, mac2] ...] }
Expand Down Expand Up @@ -131,7 +164,7 @@ def read_neigh_table_to_maps():

# Use netlink to set neigh table into kernel, not overwrite the existing ones
def set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac):
logging.info('Add neighbor entries: family: {}, intf_idx: {}, ip: {}, mac: {}'.format(
log_info('Add neighbor entries: family: {}, intf_idx: {}, ip: {}, mac: {}'.format(
family, intf_idx, dst_ip, dmac))

if family not in ip_family:
Expand All @@ -152,7 +185,7 @@ def set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac):
# If neigh exists, log it but no exception raise, other exceptions, raise
except NetlinkError as e:
if e[0] == errno.EEXIST:
logger.warning('Neigh exists in kernel with family: {}, intf_idx: {}, ip: {}, mac: {}'.format(
log_warning('Neigh exists in kernel with family: {}, intf_idx: {}, ip: {}, mac: {}'.format(
family, intf_idx, dst_ip, dmac))
else:
raise
Expand Down Expand Up @@ -196,10 +229,13 @@ def restore_update_kernel_neighbors(intf_neigh_map, timeout=DEF_TIME_OUT):
ipclass = IPRoute()
mtime = monotonic.time.time
start_time = mtime()
is_intf_up.counter = 0
db = swsssdk.SonicV2Connector(host='127.0.0.1')
db.connect(db.STATE_DB, False)
while (mtime() - start_time) < timeout:
for intf, family_neigh_map in intf_neigh_map.items():
# only try to restore to kernel when link is up
if is_intf_oper_state_up(intf):
if is_intf_up(intf, db):
src_mac = get_if_hwaddr(intf)
intf_idx = ipclass.link_lookup(ifname=intf)[0]
# create socket per intf to send packets
Expand All @@ -215,6 +251,8 @@ def restore_update_kernel_neighbors(intf_neigh_map, timeout=DEF_TIME_OUT):
# use netlink to set neighbor entries
set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac)

log_info('Sending Neigh with family: {}, intf_idx: {}, ip: {}, mac: {}'.format(
family, intf_idx, dst_ip, dmac))
# sending arp/ns packet to update kernel neigh info
s.send(build_arp_ns_pkt(family, src_mac, src_ip, dst_ip))
# delete this family on the intf
Expand All @@ -229,28 +267,27 @@ def restore_update_kernel_neighbors(intf_neigh_map, timeout=DEF_TIME_OUT):
if not intf_neigh_map:
break
time.sleep(CHECK_INTERVAL)
db.close(db.STATE_DB)


def main():

print "restore_neighbors service is started"

log_info ("restore_neighbors service is started")
# Use warmstart python binding to check warmstart information
warmstart = swsscommon.WarmStart()
warmstart.initialize("neighsyncd", "swss")
warmstart.checkWarmStart("neighsyncd", "swss", False)

# if swss or system warm reboot not enabled, don't run
if not warmstart.isWarmStart():
print "restore_neighbors service is skipped as warm restart not enabled"
log_info ("restore_neighbors service is skipped as warm restart not enabled")
return

# swss restart not system warm reboot, set statedb directly
if not warmstart.isSystemWarmRebootEnabled():
set_statedb_neigh_restore_done()
print "restore_neighbors service is done as system warm reboot not enabled"
log_info ("restore_neighbors service is done as system warm reboot not enabled")
return

# read the neigh table from appDB to internal map
try:
intf_neigh_map = read_neigh_table_to_maps()
Expand All @@ -266,7 +303,7 @@ def main():

# set statedb to signal other processes like neighsyncd
set_statedb_neigh_restore_done()
print "restore_neighbor service is done for system warmreboot"
log_info ("restore_neighbor service is done for system warmreboot")
return

if __name__ == '__main__':
Expand Down