Skip to content

Commit e274d86

Browse files
committed
Fix some edge-cases in rapid reloads causing config to go missing.
This is likely only an issue in our test suite, but if sending rapid reload commands to the api-umbrella process, the TrafficServer config could go missing for a short period of time, which could lead to requests not being routed. This fixes it by only changing the config files if they actually differ (to prevent unnecessary reloads by TrafficServer), and then also ensuring that all config files are fully written to disk before being moved into place (so processes don't ever pick up on partially written config files).
1 parent 56a2367 commit e274d86

File tree

2 files changed

+50
-13
lines changed

2 files changed

+50
-13
lines changed

src/api-umbrella/cli/setup.lua

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ local run_command = require "api-umbrella.utils.run_command"
1010
local stat = require "posix.sys.stat"
1111
local tablex = require "pl.tablex"
1212
local unistd = require "posix.unistd"
13+
local xpcall_error_handler = require "api-umbrella.utils.xpcall_error_handler"
1314

1415
local chmod = stat.chmod
1516
local chown = unistd.chown
@@ -142,6 +143,18 @@ local function ensure_geoip_db()
142143
end
143144
end
144145

146+
local function set_template_permissions(file_path, install_filename)
147+
if config["group"] then
148+
chown(file_path, nil, config["group"])
149+
end
150+
151+
if install_filename == "rc.log" or install_filename == "rc.main" or install_filename == "rc.perp" then
152+
chmod(file_path, tonumber("0750", 8))
153+
else
154+
chmod(file_path, tonumber("0640", 8))
155+
end
156+
end
157+
145158
local function write_templates()
146159
local template_root = path.join(config["_src_root_dir"], "templates/etc")
147160
for root, _, files in dir.walk(template_root) do
@@ -176,17 +189,27 @@ local function write_templates()
176189
content = lustache:render(mustache_unescape(content), config)
177190
end
178191

179-
dir.makepath(path.dirname(install_path))
180-
file.write(install_path, content)
181-
if config["group"] then
182-
chown(install_path, nil, config["group"])
183-
end
184-
185192
local install_filename = path.basename(install_path)
186-
if install_filename == "rc.log" or install_filename == "rc.main" or install_filename == "rc.perp" then
187-
chmod(install_path, tonumber("0750", 8))
193+
194+
-- Only write the file if it differs from the existing file. This helps
195+
-- prevents some processes, like Trafficserver, from thinking there are
196+
-- config file updates to process on reloads if the file timestamps
197+
-- change (even if there aren't actually any changes).
198+
local _, existing_content = xpcall(file.read, xpcall_error_handler, install_path, true)
199+
if content ~= existing_content then
200+
-- Write the config file in an atomic fashion (by writing to a temp
201+
-- file and then moving into place), so that during reloads the
202+
-- processes never read a half-written file.
203+
local install_dir = path.dirname(install_path)
204+
local temp_path = path.tmpname()
205+
file.write(temp_path, "")
206+
set_template_permissions(temp_path, install_filename)
207+
file.write(temp_path, content)
208+
209+
dir.makepath(install_dir)
210+
file.move(temp_path, install_path)
188211
else
189-
chmod(install_path, tonumber("0640", 8))
212+
set_template_permissions(install_path, install_filename)
190213
end
191214
end
192215
end

test/support/api_umbrella_test_helpers/process.rb

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -242,11 +242,25 @@ def perp_signal(service_name, signal_name)
242242
end
243243

244244
def perp_pid(service_name)
245-
output, _status = run_shell("perpstat", "-b", File.join($config["root_dir"], "etc/perp"), service_name)
246-
matches = output.match(/^\s*main: up .*\(pid (\d+)\)\s*$/)
247245
pid = nil
248-
if matches
249-
pid = matches[1]
246+
begin
247+
# If api-umbrella is actively being reloaded, then the "perphup" signal
248+
# sent to perp may temporarily result in perpstat not thinking services
249+
# are activated until the reload has finished (so no pids will be
250+
# returned). So retry fetching the pids for a while to account for this
251+
# timing edge-case while reloads are being tested.
252+
Timeout.timeout(5) do
253+
loop do
254+
output, _status = run_shell("perpstat", "-b", File.join($config["root_dir"], "etc/perp"), service_name)
255+
matches = output.match(/^\s*main: up .*\(pid (\d+)\)\s*$/)
256+
if matches
257+
pid = matches[1]
258+
break
259+
end
260+
end
261+
end
262+
rescue Timeout::Error # rubocop:disable Lint/HandleExceptions
263+
# Ignore and return nil pid.
250264
end
251265

252266
pid

0 commit comments

Comments
 (0)