@@ -76,7 +76,7 @@ let distributed_shell_program
7676let create
7777 ?(host =Ketrew_host. tmp_on_localhost)
7878 ?(daemonize_using =`Python_daemon )
79- ?(daemon_start_timeout =3600 . )
79+ ?(daemon_start_timeout =20 . )
8080 program =
8181 let created = {host; program; daemonize_using; daemon_start_timeout} in
8282 `Long_running (name, `Created created |> serialize)
@@ -168,6 +168,20 @@ let get_application_id daemonize_run_param =
168168 >> = fun stderr ->
169169 find_application_id (stdout ^ stderr)
170170
171+ let parse_status str =
172+ let lines = String. split ~on: (`Character '\n' ) str in
173+ let key_values =
174+ List. map lines ~f: (fun line ->
175+ String. split ~on: (`Character ':' ) line
176+ |> List. map ~f: String. strip)
177+ in
178+ match
179+ List. find key_values ~f: (function "Final-State" :: _ -> true | _ -> false )
180+ with
181+ | Some (_ :: "SUCCEEDED" :: _ ) -> `Succeeded
182+ | Some (_ :: " FAILED" :: _)
183+ | Some (_ :: "KILLED" :: _ ) -> `Failed
184+ | Some _ | None -> `Unknown
171185
172186let query run_param item =
173187 match run_param with
@@ -231,7 +245,7 @@ let start = function
231245 Ketrew_daemonize. create
232246 ~starting_timeout: daemon_start_timeout
233247 ~host actual_program ~using: daemonize_using
234- ?call_script in
248+ ?call_script ~no_log_is_ok: true in
235249 Ketrew_daemonize. (start (deserialize_exn daemonize_run_param))
236250 >> = fun daemonized_script ->
237251 return (`Running {created; daemonized_script})
@@ -251,7 +265,25 @@ let update run_parameters =
251265 return (`Failed (new_rp, s))
252266 | `Succeeded rp ->
253267 make_new_rp rp >> = fun new_rp ->
254- return (`Succeeded new_rp)
268+ (* Since we use `~no_log_is_ok:true` it is pretty easy for a
269+ daemonized process to succeed while the yarn application
270+ failed, hence we need to get the status from yarn. *)
271+ begin
272+ begin
273+ let host = run.created.host in
274+ get_application_id run.daemonized_script
275+ >> = fun app_id ->
276+ shell_command_output_or_log ~host (fmt " yarn application -status %s" app_id)
277+ >> = fun application_status_string ->
278+ begin match parse_status application_status_string with
279+ | `Succeeded -> return (`Succeeded new_rp)
280+ | `Failed -> return (`Failed (new_rp, " Yarn-status: FAILED" ))
281+ | `Unknown -> return (`Still_running new_rp)
282+ end
283+ end >>< function
284+ | `Ok o -> return o
285+ | `Error log -> fail (`Fatal (Log. to_long_string log))
286+ end
255287 | `Still_running rp ->
256288 make_new_rp rp >> = fun new_rp ->
257289 return (`Still_running new_rp)
0 commit comments