[how]
mq publish the crash event, lifecycle will deal the job event.
when job_state is crash and the job is consumed, then close the job
and reboot crash machine, if no job, just reboot crash machine.
Signed-off-by: Li Ping <1477412247(a)qq.com>
---
src/lib/lifecycle.cr | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/src/lib/lifecycle.cr b/src/lib/lifecycle.cr
index 4bd2ac0..8f34903 100644
--- a/src/lib/lifecycle.cr
+++ b/src/lib/lifecycle.cr
@@ -122,6 +122,8 @@ class Lifecycle
on_job_close(event)
when "abnormal"
on_abnormal_job(event)
+ when "crash"
+ on_job_crash(event)
else
on_other_job(event)
end
@@ -190,6 +192,17 @@ class Lifecycle
update_cached_machine(job["testbox"].to_s, event)
end
+ def on_job_crash(event)
+ event_job_id = event["job_id"].to_s
+ if @jobs[event_job_id]?
+ @jobs.delete(event_job_id)
+ spawn @scheduler_api.close_job(event_job_id, "crash", "lifecycle")
+ end
+
+ testbox = event["testbox"].to_s
+ reboot_crash_machine(testbox, event)
+ end
+
def on_job_boot(event)
event_job_id = event["job_id"]?.to_s
@jobs[event_job_id] = event unless event_job_id.empty?
@@ -298,6 +311,22 @@ class Lifecycle
spawn @scheduler_api.close_job(job_id, reason, "lifecycle")
end
+ def reboot_crash_machine(testbox, event)
+ @machines.delete(testbox)
+ machine = @es.get_tbox(testbox)
+ return unless machine
+ return unless event["time"].to_s.bigger_than?(machine["time"]?)
+
+ mq_queue = get_machine_reboot_queue(testbox)
+ machine.as_h.delete("history")
+ machine.as_h["testbox"] = JSON::Any.new(testbox)
+ @mq.publish_confirm(mq_queue, machine.to_json, durable: true)
+
+ machine["state"] = "rebooting_queue"
+ machine["time"] = Time.local.to_s("%Y-%m-%dT%H:%M:%S+0800")
+ @es.update_tbox(testbox, machine.as_h)
+ end
+
def reboot_timeout_machine(testbox)
@machines.delete(testbox)
machine = @es.get_tbox(testbox)
--
2.23.0