[PATCH compass-ci] src/lib: add on_job_crash

29 Mar 2021

[how]
mq publish the crash event, lifecycle will deal the job event.
when job_state is crash and the job is consumed, then close the job
and reboot crash machine, if no job, just reboot crash machine.

Signed-off-by: Li Ping <1477412247@qq.com>
---
  src/lib/lifecycle.cr | 29 +++++++++++++++++++++++++++++
  1 file changed, 29 insertions(+)

diff --git a/src/lib/lifecycle.cr b/src/lib/lifecycle.cr
index 4bd2ac0..8f34903 100644
--- a/src/lib/lifecycle.cr
+++ b/src/lib/lifecycle.cr
@@ -122,6 +122,8 @@ class Lifecycle
          on_job_close(event)
        when "abnormal"
          on_abnormal_job(event)
+      when "crash"
+        on_job_crash(event)
        else
          on_other_job(event)
        end
@@ -190,6 +192,17 @@ class Lifecycle
      update_cached_machine(job["testbox"].to_s, event)
    end
  
+  def on_job_crash(event)
+    event_job_id = event["job_id"].to_s
+    if @jobs[event_job_id]?
+      @jobs.delete(event_job_id)
+      spawn @scheduler_api.close_job(event_job_id, "crash", "lifecycle")
+    end
+
+    testbox = event["testbox"].to_s
+    reboot_crash_machine(testbox, event)
+  end
+
    def on_job_boot(event)
      event_job_id = event["job_id"]?.to_s
      @jobs[event_job_id] = event unless event_job_id.empty?
@@ -298,6 +311,22 @@ class Lifecycle
      spawn @scheduler_api.close_job(job_id, reason, "lifecycle")
    end
  
+  def reboot_crash_machine(testbox, event)
+    @machines.delete(testbox)
+    machine = @es.get_tbox(testbox)
+    return unless machine
+    return unless event["time"].to_s.bigger_than?(machine["time"]?)
+
+    mq_queue = get_machine_reboot_queue(testbox)
+    machine.as_h.delete("history")
+    machine.as_h["testbox"] = JSON::Any.new(testbox)
+    @mq.publish_confirm(mq_queue, machine.to_json, durable: true)
+
+    machine["state"] = "rebooting_queue"
+    machine["time"] = Time.local.to_s("%Y-%m-%dT%H:%M:%S+0800")
+    @es.update_tbox(testbox, machine.as_h)
+  end
+
    def reboot_timeout_machine(testbox)
      @machines.delete(testbox)
      machine = @es.get_tbox(testbox)
-- 
2.23.0

    

Li Ping

tags

participants (1)