Skip to content

Commit bec6de0

Browse files
committed
Free YARN backend from shared filesystems
This is an attempt at fixing #193, it requires more testing.
1 parent 4d96eec commit bec6de0

File tree

1 file changed

+35
-3
lines changed

1 file changed

+35
-3
lines changed

src/lib/ketrew_yarn.ml

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ let distributed_shell_program
7676
let create
7777
?(host=Ketrew_host.tmp_on_localhost)
7878
?(daemonize_using=`Python_daemon)
79-
?(daemon_start_timeout=3600.)
79+
?(daemon_start_timeout=20.)
8080
program =
8181
let created = {host; program; daemonize_using; daemon_start_timeout} in
8282
`Long_running (name, `Created created |> serialize)
@@ -168,6 +168,20 @@ let get_application_id daemonize_run_param =
168168
>>= fun stderr ->
169169
find_application_id (stdout ^ stderr)
170170

171+
let parse_status str =
172+
let lines = String.split ~on:(`Character '\n') str in
173+
let key_values =
174+
List.map lines ~f:(fun line ->
175+
String.split ~on:(`Character ':') line
176+
|> List.map ~f:String.strip)
177+
in
178+
match
179+
List.find key_values ~f:(function "Final-State" :: _ -> true | _ -> false)
180+
with
181+
| Some (_ :: "SUCCEEDED" :: _) -> `Succeeded
182+
| Some (_ :: "FAILED" :: _)
183+
| Some (_ :: "KILLED" :: _) -> `Failed
184+
| Some _ | None -> `Unknown
171185

172186
let query run_param item =
173187
match run_param with
@@ -231,7 +245,7 @@ let start = function
231245
Ketrew_daemonize.create
232246
~starting_timeout:daemon_start_timeout
233247
~host actual_program ~using:daemonize_using
234-
?call_script in
248+
?call_script ~no_log_is_ok:true in
235249
Ketrew_daemonize.(start (deserialize_exn daemonize_run_param))
236250
>>= fun daemonized_script ->
237251
return (`Running {created; daemonized_script})
@@ -251,7 +265,25 @@ let update run_parameters =
251265
return (`Failed (new_rp, s))
252266
| `Succeeded rp ->
253267
make_new_rp rp >>= fun new_rp ->
254-
return (`Succeeded new_rp)
268+
(* Since we use `~no_log_is_ok:true` it is pretty easy for a
269+
daemonized process to succeed while the yarn application
270+
failed, hence we need to get the status from yarn. *)
271+
begin
272+
begin
273+
let host = run.created.host in
274+
get_application_id run.daemonized_script
275+
>>= fun app_id ->
276+
shell_command_output_or_log ~host (fmt "yarn application -status %s" app_id)
277+
>>= fun application_status_string ->
278+
begin match parse_status application_status_string with
279+
| `Succeeded -> return (`Succeeded new_rp)
280+
| `Failed -> return (`Failed (new_rp, "Yarn-status: FAILED"))
281+
| `Unknown -> return (`Still_running new_rp)
282+
end
283+
end >>< function
284+
| `Ok o -> return o
285+
| `Error log -> fail (`Fatal (Log.to_long_string log))
286+
end
255287
| `Still_running rp ->
256288
make_new_rp rp >>= fun new_rp ->
257289
return (`Still_running new_rp)

0 commit comments

Comments
 (0)