Skip to content

Commit 2167244

Browse files
committed
Merge branch 'dash' into dash_acl
2 parents 69d0d7a + ab29920 commit 2167244

19 files changed

Lines changed: 604 additions & 52 deletions

cfgmgr/Makefile.am

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@ dist_cfgmgr_DATA = \
1515
buffer_pool_mellanox.lua \
1616
buffer_check_headroom_vs.lua \
1717
buffer_headroom_vs.lua \
18-
buffer_pool_vs.lua
18+
buffer_pool_vs.lua \
19+
buffer_check_headroom_barefoot.lua \
20+
buffer_headroom_barefoot.lua \
21+
buffer_pool_barefoot.lua
1922

2023
if DEBUG
2124
DBGFLAGS = -ggdb -DDEBUG
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
local ret = {}
2+
3+
table.insert(ret, "result:true")
4+
table.insert(ret, "debug:No need to check port headroom limit as shared headroom pool model is supported.")
5+
6+
return ret
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
-- KEYS - profile name
2+
-- ARGV[1] - port speed
3+
-- ARGV[2] - cable length
4+
-- ARGV[3] - port mtu
5+
-- ARGV[4] - gearbox delay
6+
7+
-- Parameters retried from databases:
8+
-- From CONFIG_DB.LOSSLESS_TRAFFIC_PATTERN
9+
-- small packet percentage: the parameter which is used to control worst case regarding the cell utilization
10+
-- mtu: the mtu of lossless packet
11+
-- From STATE_DB.ASIC_TABLE:
12+
-- cell size: cell_size of the ASIC
13+
-- pipeline_latency: the latency (XON)
14+
-- mac_phy_delay: the bytes held in the switch chip's egress pipeline and PHY when XOFF has been generated
15+
-- peer_response_time: the bytes that are held in the peer switch's pipeline and will be send out when the XOFF packet is received
16+
17+
local lossless_mtu
18+
local small_packet_percentage
19+
local cell_size
20+
local pipeline_latency
21+
local mac_phy_delay
22+
local peer_response_time
23+
24+
local port_speed = tonumber(ARGV[1])
25+
local cable_length = tonumber(string.sub(ARGV[2], 1, -2))
26+
local port_mtu = tonumber(ARGV[3])
27+
local gearbox_delay = tonumber(ARGV[4])
28+
29+
local config_db = "4"
30+
local state_db = "6"
31+
32+
local ret = {}
33+
34+
-- Pause quanta should be taken for each operating speed is defined in IEEE 802.3 31B.3.7.
35+
-- The key of table pause_quanta_per_speed is operating speed at Mb/s.
36+
-- The value of table pause_quanta_per_speed is the number of pause_quanta.
37+
local pause_quanta_per_speed = {}
38+
pause_quanta_per_speed[400000] = 905
39+
pause_quanta_per_speed[200000] = 453
40+
pause_quanta_per_speed[100000] = 394
41+
pause_quanta_per_speed[50000] = 147
42+
pause_quanta_per_speed[40000] = 118
43+
pause_quanta_per_speed[25000] = 80
44+
pause_quanta_per_speed[10000] = 67
45+
pause_quanta_per_speed[1000] = 2
46+
pause_quanta_per_speed[100] = 1
47+
48+
-- Get pause_quanta from the pause_quanta_per_speed table
49+
local pause_quanta = pause_quanta_per_speed[port_speed]
50+
51+
if gearbox_delay == nil then
52+
gearbox_delay = 0
53+
end
54+
55+
-- Fetch ASIC info from ASIC table in STATE_DB
56+
redis.call("SELECT", state_db)
57+
local asic_keys = redis.call("KEYS", "ASIC_TABLE*")
58+
59+
-- Only one key should exist
60+
local asic_table_content = redis.call("HGETALL", asic_keys[1])
61+
62+
for i = 1, #asic_table_content, 2 do
63+
if asic_table_content[i] == "cell_size" then
64+
cell_size = tonumber(asic_table_content[i+1])
65+
end
66+
if asic_table_content[i] == "pipeline_latency" then
67+
pipeline_latency = tonumber(asic_table_content[i+1]) * 1024
68+
end
69+
if asic_table_content[i] == "mac_phy_delay" then
70+
mac_phy_delay = tonumber(asic_table_content[i+1]) * 1024
71+
end
72+
-- If failed to get pause_quanta from the table, then use the default peer_response_time stored in state_db
73+
if asic_table_content[i] == "peer_response_time" and pause_quanta == nil then
74+
peer_response_time = tonumber(asic_table_content[i+1]) * 1024
75+
end
76+
end
77+
78+
-- Fetch lossless traffic info from CONFIG_DB
79+
redis.call("SELECT", config_db)
80+
local lossless_traffic_keys = redis.call("KEYS", "LOSSLESS_TRAFFIC_PATTERN*")
81+
82+
-- Only one key should exist
83+
local lossless_traffic_table_content = redis.call("HGETALL", lossless_traffic_keys[1])
84+
for i = 1, #lossless_traffic_table_content, 2 do
85+
if lossless_traffic_table_content[i] == "mtu" then
86+
lossless_mtu = tonumber(lossless_traffic_table_content[i+1])
87+
end
88+
if lossless_traffic_table_content[i] == "small_packet_percentage" then
89+
small_packet_percentage = tonumber(lossless_traffic_table_content[i+1])
90+
end
91+
end
92+
93+
-- Fetch the shared headroom pool size
94+
local shp_size = tonumber(redis.call("HGET", "BUFFER_POOL|ingress_lossless_pool", "xoff"))
95+
96+
-- Calculate the headroom information
97+
local speed_of_light = 198000000
98+
local minimal_packet_size = 64
99+
local cell_occupancy
100+
local worst_case_factor
101+
local propagation_delay
102+
local bytes_on_cable
103+
local bytes_on_gearbox
104+
local xoff_value
105+
local xon_value
106+
local headroom_size
107+
108+
if cell_size > 2 * minimal_packet_size then
109+
worst_case_factor = cell_size / minimal_packet_size
110+
else
111+
worst_case_factor = (2 * cell_size) / (1 + cell_size)
112+
end
113+
114+
cell_occupancy = (100 - small_packet_percentage + small_packet_percentage * worst_case_factor) / 100
115+
116+
if (gearbox_delay == 0) then
117+
bytes_on_gearbox = 0
118+
else
119+
bytes_on_gearbox = port_speed * gearbox_delay / (8 * 1024)
120+
end
121+
122+
-- If successfully get pause_quanta from the table, then calculate peer_response_time from it
123+
if pause_quanta ~= nil then
124+
peer_response_time = (pause_quanta) * 512 / 8
125+
end
126+
127+
if port_speed == 400000 then
128+
peer_response_time = 2 * peer_response_time
129+
end
130+
131+
bytes_on_cable = 2 * cable_length * port_speed * 1000000000 / speed_of_light / (8 * 1024)
132+
propagation_delay = port_mtu + bytes_on_cable + 2 * bytes_on_gearbox + mac_phy_delay + peer_response_time
133+
134+
-- Calculate the xoff and xon and then round up at 1024 bytes
135+
xoff_value = lossless_mtu + propagation_delay * cell_occupancy
136+
xoff_value = math.ceil(xoff_value / 1024) * 1024
137+
xon_value = pipeline_latency
138+
xon_value = math.ceil(xon_value / 1024) * 1024
139+
140+
headroom_size = xon_value
141+
headroom_size = math.ceil(headroom_size / 1024) * 1024
142+
143+
table.insert(ret, "xon" .. ":" .. math.ceil(xon_value))
144+
table.insert(ret, "xoff" .. ":" .. math.ceil(xoff_value))
145+
table.insert(ret, "size" .. ":" .. math.ceil(headroom_size))
146+
147+
return ret

cfgmgr/buffer_pool_barefoot.lua

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
-- KEYS - None
2+
-- ARGV - None
3+
4+
local result = {}
5+
local config_db = "4"
6+
local state_db = "6"
7+
8+
redis.call("SELECT", state_db)
9+
local asic_keys = redis.call("KEYS", "ASIC_TABLE*")
10+
local cell_size = tonumber(redis.call("HGET", asic_keys[1], "cell_size"))
11+
12+
-- Based on cell_size, calculate singular headroom
13+
local ppg_headroom = 400 * cell_size
14+
15+
redis.call("SELECT", config_db)
16+
local ports = redis.call("KEYS", "PORT|*")
17+
local ports_num = #ports
18+
19+
-- 2 PPGs per port, 70% of possible maximum value.
20+
local shp_size = math.ceil(ports_num * 2 * ppg_headroom * 0.7)
21+
22+
local ingress_lossless_pool_size_fixed = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'size'))
23+
local ingress_lossy_pool_size_fixed = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossy_pool', 'size'))
24+
local egress_lossy_pool_size_fixed = tonumber(redis.call('HGET', 'BUFFER_POOL|egress_lossy_pool', 'size'))
25+
26+
table.insert(result, "ingress_lossless_pool" .. ":" .. ingress_lossless_pool_size_fixed .. ":" .. shp_size)
27+
table.insert(result, "ingress_lossy_pool" .. ":" .. ingress_lossy_pool_size_fixed)
28+
table.insert(result, "egress_lossy_pool" .. ":" .. egress_lossy_pool_size_fixed)
29+
30+
return result

cfgmgr/buffermgr.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ task_process_status BufferMgr::doSpeedUpdateTask(string port)
196196
// Although we have up to 8 PGs for now, the range to check is expanded to 32 support more PGs
197197
set<string> lossless_pg_combinations = generateIdListFromMap(lossless_pg_id, sizeof(lossless_pg_id));
198198

199-
if (m_portStatusLookup[port] == "down" && m_platform == "mellanox")
199+
if (m_portStatusLookup[port] == "down" && (m_platform == "mellanox" || m_platform == "barefoot"))
200200
{
201201
for (auto lossless_pg : lossless_pg_combinations)
202202
{

cfgmgr/vlanmgr.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,11 @@ bool VlanMgr::addHostVlan(int vlan_id)
134134
std::string res;
135135
EXEC_WITH_ERROR_THROW(cmds, res);
136136

137+
res.clear();
138+
const std::string echo_cmd = std::string("")
139+
+ ECHO_CMD + " 0 > /proc/sys/net/ipv4/conf/" + VLAN_PREFIX + std::to_string(vlan_id) + "/arp_evict_nocarrier";
140+
swss::exec(echo_cmd, res);
141+
137142
return true;
138143
}
139144

orchagent/fabricportsorch.cpp

Lines changed: 95 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,18 @@
1111
#include "timer.h"
1212

1313
#define FABRIC_POLLING_INTERVAL_DEFAULT (30)
14+
#define FABRIC_PORT_PREFIX "PORT"
1415
#define FABRIC_PORT_ERROR 0
1516
#define FABRIC_PORT_SUCCESS 1
1617
#define FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_PORT_STAT_COUNTER"
1718
#define FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 10000
1819
#define FABRIC_QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_QUEUE_STAT_COUNTER"
1920
#define FABRIC_QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 100000
20-
#define FABRIC_PORT_TABLE "FABRIC_PORT_TABLE"
2121

2222
extern sai_object_id_t gSwitchId;
2323
extern sai_switch_api_t *sai_switch_api;
2424
extern sai_port_api_t *sai_port_api;
25+
extern sai_queue_api_t *sai_queue_api;
2526

2627
const vector<sai_port_stat_t> port_stat_ids =
2728
{
@@ -42,7 +43,8 @@ static const vector<sai_queue_stat_t> queue_stat_ids =
4243
SAI_QUEUE_STAT_CURR_OCCUPANCY_LEVEL,
4344
};
4445

45-
FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pri_t> &tableNames) :
46+
FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pri_t> &tableNames,
47+
bool fabricPortStatEnabled, bool fabricQueueStatEnabled) :
4648
Orch(appl_db, tableNames),
4749
port_stat_manager(FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ,
4850
FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true),
@@ -55,14 +57,17 @@ FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pr
5557
SWSS_LOG_NOTICE( "FabricPortsOrch constructor" );
5658

5759
m_state_db = shared_ptr<DBConnector>(new DBConnector("STATE_DB", 0));
58-
m_stateTable = unique_ptr<Table>(new Table(m_state_db.get(), FABRIC_PORT_TABLE));
60+
m_stateTable = unique_ptr<Table>(new Table(m_state_db.get(), APP_FABRIC_PORT_TABLE_NAME));
5961

6062
m_counter_db = shared_ptr<DBConnector>(new DBConnector("COUNTERS_DB", 0));
61-
m_laneQueueCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_QUEUE_NAME_MAP));
62-
m_lanePortCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_QUEUE_PORT_MAP));
63+
m_portNameQueueCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_FABRIC_QUEUE_NAME_MAP));
64+
m_portNamePortCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_FABRIC_PORT_NAME_MAP));
6365

6466
m_flex_db = shared_ptr<DBConnector>(new DBConnector("FLEX_COUNTER_DB", 0));
65-
m_flexCounterTable = unique_ptr<ProducerTable>(new ProducerTable(m_flex_db.get(), FABRIC_PORT_TABLE));
67+
m_flexCounterTable = unique_ptr<ProducerTable>(new ProducerTable(m_flex_db.get(), APP_FABRIC_PORT_TABLE_NAME));
68+
69+
m_fabricPortStatEnabled = fabricPortStatEnabled;
70+
m_fabricQueueStatEnabled = fabricQueueStatEnabled;
6671

6772
getFabricPortList();
6873

@@ -147,32 +152,96 @@ bool FabricPortsOrch::allPortsReady()
147152

148153
void FabricPortsOrch::generatePortStats()
149154
{
150-
// FIX_ME: This function installs flex counters for port stats
151-
// on fabric ports for fabric asics and voq asics (that connect
152-
// to fabric asics via fabric ports). These counters will be
153-
// installed in FLEX_COUNTER_DB, and queried by syncd and updated
154-
// to COUNTERS_DB.
155-
// However, currently BCM SAI doesn't update its code to query
156-
// port stats (metrics in list port_stat_ids) yet.
157-
// Also, BCM sets too low value for "Max logical port count" (256),
158-
// causing syncd to crash on voq asics that now include regular front
159-
// panel ports, fabric ports, and multiple logical ports.
160-
// So, this function will just do nothing for now, and we will readd
161-
// code to install port stats counters when BCM completely supports.
155+
if (!m_fabricPortStatEnabled) return;
156+
157+
SWSS_LOG_NOTICE("Generate fabric port stats");
158+
159+
vector<FieldValueTuple> portNamePortCounterMap;
160+
for (auto p : m_fabricLanePortMap)
161+
{
162+
int lane = p.first;
163+
sai_object_id_t port = p.second;
164+
165+
std::ostringstream portName;
166+
portName << FABRIC_PORT_PREFIX << lane;
167+
portNamePortCounterMap.emplace_back(portName.str(), sai_serialize_object_id(port));
168+
169+
// Install flex counters for port stats
170+
std::unordered_set<std::string> counter_stats;
171+
for (const auto& it: port_stat_ids)
172+
{
173+
counter_stats.emplace(sai_serialize_port_stat(it));
174+
}
175+
port_stat_manager.setCounterIdList(port, CounterType::PORT, counter_stats);
176+
}
177+
m_portNamePortCounterTable->set("", portNamePortCounterMap);
162178
}
163179

164180
void FabricPortsOrch::generateQueueStats()
165181
{
182+
if (!m_fabricQueueStatEnabled) return;
166183
if (m_isQueueStatsGenerated) return;
167184
if (!m_getFabricPortListDone) return;
168185

169-
// FIX_ME: Similar to generatePortStats(), generateQueueStats() installs
170-
// flex counters for queue stats on fabric ports for fabric asics and voq asics.
171-
// However, currently BCM SAI doesn't fully support queue stats query.
172-
// Query on queue type and index is not supported for fabric asics while
173-
// voq asics are not completely supported.
174-
// So, this function will just do nothing for now, and we will readd
175-
// code to install queue stats counters when BCM completely supports.
186+
SWSS_LOG_NOTICE("Generate queue map for fabric ports");
187+
188+
sai_status_t status;
189+
sai_attribute_t attr;
190+
191+
for (auto p : m_fabricLanePortMap)
192+
{
193+
int lane = p.first;
194+
sai_object_id_t port = p.second;
195+
196+
// Each serdes has some pipes (queues) for unicast and multicast.
197+
// But normally fabric serdes uses only one pipe.
198+
attr.id = SAI_PORT_ATTR_QOS_NUMBER_OF_QUEUES;
199+
status = sai_port_api->get_port_attribute(port, 1, &attr);
200+
if (status != SAI_STATUS_SUCCESS)
201+
{
202+
throw runtime_error("FabricPortsOrch get port queue number failure");
203+
}
204+
int num_queues = attr.value.u32;
205+
206+
if (num_queues > 0)
207+
{
208+
vector<sai_object_id_t> m_queue_ids;
209+
m_queue_ids.resize(num_queues);
210+
211+
attr.id = SAI_PORT_ATTR_QOS_QUEUE_LIST;
212+
attr.value.objlist.count = (uint32_t) num_queues;
213+
attr.value.objlist.list = m_queue_ids.data();
214+
215+
status = sai_port_api->get_port_attribute(port, 1, &attr);
216+
if (status != SAI_STATUS_SUCCESS)
217+
{
218+
throw runtime_error("FabricPortsOrch get port queue list failure");
219+
}
220+
221+
// Maintain queue map and install flex counters for queue stats
222+
vector<FieldValueTuple> portNameQueueMap;
223+
224+
// Fabric serdes queue type is SAI_QUEUE_TYPE_FABRIC_TX. Since we always
225+
// maintain only one queue for fabric serdes, m_queue_ids size is 1.
226+
// And so, there is no need to query SAI_QUEUE_ATTR_TYPE and SAI_QUEUE_ATTR_INDEX
227+
// for queue. Actually, SAI does not support query these attributes on fabric serdes.
228+
int queueIndex = 0;
229+
std::ostringstream portName;
230+
portName << FABRIC_PORT_PREFIX << lane << ":" << queueIndex;
231+
const auto queue = sai_serialize_object_id(m_queue_ids[queueIndex]);
232+
portNameQueueMap.emplace_back(portName.str(), queue);
233+
234+
// We collect queue counters like occupancy level
235+
std::unordered_set<string> counter_stats;
236+
for (const auto& it: queue_stat_ids)
237+
{
238+
counter_stats.emplace(sai_serialize_queue_stat(it));
239+
}
240+
queue_stat_manager.setCounterIdList(m_queue_ids[queueIndex], CounterType::QUEUE, counter_stats);
241+
242+
m_portNameQueueCounterTable->set("", portNameQueueMap);
243+
}
244+
}
176245

177246
m_isQueueStatsGenerated = true;
178247
}
@@ -199,7 +268,7 @@ void FabricPortsOrch::updateFabricPortState()
199268
int lane = p.first;
200269
sai_object_id_t port = p.second;
201270

202-
string key = "PORT" + to_string(lane);
271+
string key = FABRIC_PORT_PREFIX + to_string(lane);
203272
std::vector<FieldValueTuple> values;
204273
uint32_t remote_peer;
205274
uint32_t remote_port;

0 commit comments

Comments
 (0)