Skip to content

Commit 5fa2329

Browse files
authored
Support shared headroom pool on top of dynamic buffer calculation (#1581)
* Support shared headroom pool on top of dynamic buffer calculation - Feature is enabled/disabled on-the-fly by configuring over-subscribe-ration and shared headroom pool size. If both are configured, the shared headroom pool size will take effect. When turn on/off the feature, all the lossless profiles and buffer pool size will be recalculated. - Support calculating shared headroom pool while ingress lossless pool is statically configured. - Check accumulative headroom before toggling SHP state To disable SHP results in size of PG increasing. Hence needs to check whether accumulative headroom exceed limit - Split the function doUpdateStaticProfileTask into two functions Originally it was called for static profile only and consisted of two parts: - One is for dynamic th updated. It will go over all the buffer profiles dynamically generated according to the dynamic th and update them - The other is for size updated. It will go over each port referencing the profile and check whether the accumulative headroom exceeds limit Now that it is also called by shared headroom pool, we split it into two functions to make it more clear Signed-off-by: Stephen Sun <[email protected]> How I verified it Run vs test and regression test.
1 parent 1438a70 commit 5fa2329

File tree

5 files changed

+465
-37
lines changed

5 files changed

+465
-37
lines changed

cfgmgr/buffer_headroom_mellanox.lua

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
local lossless_mtu
1818
local small_packet_percentage
19+
local over_subscribe_ratio = 0
1920
local cell_size
2021
local pipeline_latency
2122
local mac_phy_delay
@@ -72,8 +73,19 @@ for i = 1, #lossless_traffic_table_content, 2 do
7273
end
7374
end
7475

75-
-- Fetch DEFAULT_LOSSLESS_BUFFER_PARAMETER from CONFIG_DB
76-
local lossless_traffic_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*')
76+
-- Fetch over subscribe ratio
77+
local default_lossless_param_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*')
78+
local over_subscribe_ratio = tonumber(redis.call('HGET', default_lossless_param_keys[1], 'over_subscribe_ratio'))
79+
80+
-- Fetch the shared headroom pool size
81+
local shp_size = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'xoff'))
82+
83+
local shp_enabled
84+
if shp_size ~= nil and shp_size ~= 0 or over_subscribe_ratio ~= nil and over_subscribe_ratio ~= 0 then
85+
shp_enabled = true
86+
else
87+
shp_enabled = false
88+
end
7789

7890
-- Calculate the headroom information
7991
local speed_of_light = 198000000
@@ -119,7 +131,11 @@ xoff_value = math.ceil(xoff_value / 1024) * 1024
119131
xon_value = pipeline_latency
120132
xon_value = math.ceil(xon_value / 1024) * 1024
121133

122-
headroom_size = xoff_value + xon_value + speed_overhead
134+
if shp_enabled then
135+
headroom_size = xon_value
136+
else
137+
headroom_size = xoff_value + xon_value + speed_overhead
138+
end
123139
headroom_size = math.ceil(headroom_size / 1024) * 1024
124140

125141
table.insert(ret, "xon" .. ":" .. math.ceil(xon_value))

cfgmgr/buffer_pool_mellanox.lua

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,24 @@ end
8383

8484
local egress_lossless_pool_size = redis.call('HGET', 'BUFFER_POOL|egress_lossless_pool', 'size')
8585

86+
-- Whether shared headroom pool is enabled?
87+
local default_lossless_param_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*')
88+
local over_subscribe_ratio = tonumber(redis.call('HGET', default_lossless_param_keys[1], 'over_subscribe_ratio'))
89+
90+
-- Fetch the shared headroom pool size
91+
local shp_size = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'xoff'))
92+
93+
local shp_enabled = false
94+
if over_subscribe_ratio ~= nil and over_subscribe_ratio ~= 0 then
95+
shp_enabled = true
96+
end
97+
98+
if shp_size ~= nil and shp_size ~= 0 then
99+
shp_enabled = true
100+
else
101+
shp_size = 0
102+
end
103+
86104
-- Switch to APPL_DB
87105
redis.call('SELECT', appl_db)
88106

@@ -103,6 +121,7 @@ local statistics = {}
103121

104122
-- Fetch sizes of all of the profiles, accumulate them
105123
local accumulative_occupied_buffer = 0
124+
local accumulative_xoff = 0
106125
for i = 1, #profiles, 1 do
107126
if profiles[i][1] ~= "BUFFER_PROFILE_TABLE_KEY_SET" and profiles[i][1] ~= "BUFFER_PROFILE_TABLE_DEL_SET" then
108127
local size = tonumber(redis.call('HGET', profiles[i][1], 'size'))
@@ -114,6 +133,13 @@ for i = 1, #profiles, 1 do
114133
profiles[i][2] = count_up_port
115134
end
116135
if size ~= 0 then
136+
if shp_enabled and shp_size == 0 then
137+
local xon = tonumber(redis.call('HGET', profiles[i][1], 'xon'))
138+
local xoff = tonumber(redis.call('HGET', profiles[i][1], 'xoff'))
139+
if xon ~= nil and xoff ~= nil and xon + xoff > size then
140+
accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[i][2]
141+
end
142+
end
117143
accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[i][2]
118144
end
119145
table.insert(statistics, {profiles[i][1], size, profiles[i][2]})
@@ -138,7 +164,7 @@ end
138164
local asic_keys = redis.call('KEYS', 'ASIC_TABLE*')
139165
local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size'))
140166

141-
-- Align mmu_size at cell size boundary, otherwith the sdk will complain and the syncd will faill
167+
-- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail
142168
local number_of_cells = math.floor(mmu_size / cell_size)
143169
local ceiling_mmu_size = number_of_cells * cell_size
144170

@@ -149,11 +175,16 @@ redis.call('SELECT', config_db)
149175
local pools_need_update = {}
150176
local ipools = redis.call('KEYS', 'BUFFER_POOL|ingress*')
151177
local ingress_pool_count = 0
178+
local ingress_lossless_pool_size = nil
152179
for i = 1, #ipools, 1 do
153180
local size = tonumber(redis.call('HGET', ipools[i], 'size'))
154181
if not size then
155182
table.insert(pools_need_update, ipools[i])
156183
ingress_pool_count = ingress_pool_count + 1
184+
else
185+
if ipools[i] == 'BUFFER_POOL|ingress_lossless_pool' and shp_enabled and shp_size == 0 then
186+
ingress_lossless_pool_size = size
187+
end
157188
end
158189
end
159190

@@ -165,7 +196,14 @@ for i = 1, #epools, 1 do
165196
end
166197
end
167198

199+
if shp_enabled and shp_size == 0 then
200+
shp_size = math.ceil(accumulative_xoff / over_subscribe_ratio)
201+
end
202+
168203
local pool_size
204+
if shp_size then
205+
accumulative_occupied_buffer = accumulative_occupied_buffer + shp_size
206+
end
169207
if ingress_pool_count == 1 then
170208
pool_size = mmu_size - accumulative_occupied_buffer
171209
else
@@ -176,18 +214,31 @@ if pool_size > ceiling_mmu_size then
176214
pool_size = ceiling_mmu_size
177215
end
178216

217+
local shp_deployed = false
179218
for i = 1, #pools_need_update, 1 do
180219
local pool_name = string.match(pools_need_update[i], "BUFFER_POOL|([^%s]+)$")
181-
table.insert(result, pool_name .. ":" .. math.ceil(pool_size))
220+
if shp_size ~= 0 and pool_name == "ingress_lossless_pool" then
221+
table.insert(result, pool_name .. ":" .. math.ceil(pool_size) .. ":" .. math.ceil(shp_size))
222+
shp_deployed = true
223+
else
224+
table.insert(result, pool_name .. ":" .. math.ceil(pool_size))
225+
end
226+
end
227+
228+
if not shp_deployed and shp_size ~= 0 and ingress_lossless_pool_size ~= nil then
229+
table.insert(result, "ingress_lossless_pool:" .. math.ceil(ingress_lossless_pool_size) .. ":" .. math.ceil(shp_size))
182230
end
183231

184232
table.insert(result, "debug:mmu_size:" .. mmu_size)
185-
table.insert(result, "debug:accumulative:" .. accumulative_occupied_buffer)
233+
table.insert(result, "debug:accumulative size:" .. accumulative_occupied_buffer)
186234
for i = 1, #statistics do
187235
table.insert(result, "debug:" .. statistics[i][1] .. ":" .. statistics[i][2] .. ":" .. statistics[i][3])
188236
end
189237
table.insert(result, "debug:extra_400g:" .. (lossypg_reserved_400g - lossypg_reserved) .. ":" .. lossypg_400g)
190238
table.insert(result, "debug:mgmt_pool:" .. mgmt_pool_size)
191239
table.insert(result, "debug:egress_mirror:" .. accumulative_egress_mirror_overhead)
240+
table.insert(result, "debug:shp_enabled:" .. tostring(shp_enabled))
241+
table.insert(result, "debug:shp_size:" .. shp_size)
242+
table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff)
192243

193244
return result

0 commit comments

Comments
 (0)