summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Crispin <blogic@openwrt.org>2013-08-27 11:41:11 +0200
committerJohn Crispin <blogic@openwrt.org>2013-09-03 19:42:44 +0200
commitf0b6ea93233ba5134311352595969797b00a98de (patch)
tree248321a9b1d9a3c9cdf511ff413aa446967fb3f2
parent1c01a5137402cfd1bc7557118401f3b90c305b78 (diff)
downloadunitd-f0b6ea93233ba5134311352595969797b00a98de.tar
unitd-f0b6ea93233ba5134311352595969797b00a98de.zip
add respawn handling
Signed-off-by: John Crispin <blogic@openwrt.org>
-rw-r--r--instance.c52
-rw-r--r--instance.h13
-rw-r--r--service.c2
3 files changed, 57 insertions, 10 deletions
diff --git a/instance.c b/instance.c
index d61bb33..e263b84 100644
--- a/instance.c
+++ b/instance.c
@@ -32,6 +32,7 @@ enum {
INSTANCE_ATTR_NETDEV,
INSTANCE_ATTR_FILE,
INSTANCE_ATTR_TRIGGER,
+ INSTANCE_ATTR_RESPAWN,
INSTANCE_ATTR_NICE,
__INSTANCE_ATTR_MAX
};
@@ -43,6 +44,7 @@ static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = {
[INSTANCE_ATTR_NETDEV] = { "netdev", BLOBMSG_TYPE_ARRAY },
[INSTANCE_ATTR_FILE] = { "file", BLOBMSG_TYPE_ARRAY },
[INSTANCE_ATTR_TRIGGER] = { "triggers", BLOBMSG_TYPE_ARRAY },
+ [INSTANCE_ATTR_RESPAWN] = { "respawn", BLOBMSG_TYPE_ARRAY },
[INSTANCE_ATTR_NICE] = { "nice", BLOBMSG_TYPE_INT32 },
};
@@ -102,6 +104,8 @@ instance_start(struct service_instance *in)
return;
in->restart = false;
+ in->halt = !in->respawn;
+
if (!in->valid)
return;
@@ -117,6 +121,7 @@ instance_start(struct service_instance *in)
DEBUG(1, "Started instance %s::%s\n", in->srv->name, in->name);
in->proc.pid = pid;
+ clock_gettime(CLOCK_MONOTONIC, &in->start);
uloop_process_add(&in->proc);
}
@@ -126,29 +131,58 @@ instance_timeout(struct uloop_timeout *t)
struct service_instance *in;
in = container_of(t, struct service_instance, timeout);
- kill(in->proc.pid, SIGKILL);
- uloop_process_delete(&in->proc);
- in->proc.cb(&in->proc, -1);
+
+ if (!in->halt && (in->restart || in->respawn))
+ instance_start(in);
}
static void
instance_exit(struct uloop_process *p, int ret)
{
struct service_instance *in;
+ struct timespec tp;
+ long runtime;
in = container_of(p, struct service_instance, proc);
- DEBUG(1, "Instance %s::%s exit with error code %d\n", in->srv->name, in->name, ret);
+
+ clock_gettime(CLOCK_MONOTONIC, &tp);
+ runtime = tp.tv_sec - in->start.tv_sec;
+
+ DEBUG(1, "Instance %s::%s exit with error code %d after %ld seconds\n", in->srv->name, in->name, ret, runtime);
uloop_timeout_cancel(&in->timeout);
- if (in->restart)
+ if (in->halt) {
+ /* no action */
+ } else if (in->restart) {
instance_start(in);
+ } else if (in->respawn) {
+ if (runtime < RESPAWN_ERROR)
+ in->respawn_count++;
+ else
+ in->respawn_count = 0;
+ if (in->respawn_count > 5)
+ DEBUG(1, "Instance %s::%s s in a crash loop %d crashes, %ld seconds since last crash\n",
+ in->srv->name, in->name, in->respawn_count, runtime);
+ uloop_timeout_set(&in->timeout, 5000);
+ }
}
void
-instance_stop(struct service_instance *in, bool restart)
+instance_stop(struct service_instance *in)
{
if (!in->proc.pending)
return;
+ in->halt = true;
+ in->restart = in->respawn = false;
+ kill(in->proc.pid, SIGTERM);
+}
+static void
+instance_restart(struct service_instance *in)
+{
+ if (!in->proc.pending)
+ return;
+ in->halt = false;
+ in->restart = true;
kill(in->proc.pid, SIGTERM);
}
@@ -348,9 +382,9 @@ instance_update(struct service_instance *in, struct service_instance *in_new)
instance_config_move(in, in_new);
instance_start(in);
} else {
- in->restart = true;
- instance_stop(in, true);
+ instance_restart(in);
instance_config_move(in, in_new);
+ /* restart happens in the child callback handler */
}
return true;
}
@@ -375,6 +409,8 @@ instance_init(struct service_instance *in, struct service *s, struct blob_attr *
in->config = config;
in->timeout.cb = instance_timeout;
in->proc.cb = instance_exit;
+ in->respawn = true;
+ in->respawn_count = 0;
blobmsg_list_init(&in->netdev, struct instance_netdev, node, instance_netdev_cmp);
blobmsg_list_init(&in->file, struct instance_file, node, instance_file_cmp);
diff --git a/instance.h b/instance.h
index ceae834..1c8c0a0 100644
--- a/instance.h
+++ b/instance.h
@@ -19,6 +19,8 @@
#include <libubox/uloop.h>
#include "utils.h"
+#define RESPAWN_ERROR (5 * 60)
+
struct service_instance {
struct vlist_node node;
struct service *srv;
@@ -26,7 +28,16 @@ struct service_instance {
int8_t nice;
bool valid;
+
+ bool halt;
bool restart;
+ bool respawn;
+ int respawn_count;
+ struct timespec start;
+
+ int respawn_timeout;
+ int respawn_threshold;
+ int respawn_retry;
struct blob_attr *config;
struct uloop_process proc;
@@ -41,7 +52,7 @@ struct service_instance {
};
void instance_start(struct service_instance *in);
-void instance_stop(struct service_instance *in, bool restart);
+void instance_stop(struct service_instance *in);
bool instance_update(struct service_instance *in, struct service_instance *in_new);
void instance_init(struct service_instance *in, struct service *s, struct blob_attr *config);
void instance_free(struct service_instance *in);
diff --git a/service.c b/service.c
index 561c76c..c80402c 100644
--- a/service.c
+++ b/service.c
@@ -55,7 +55,7 @@ service_instance_update(struct vlist_tree *tree, struct vlist_node *node_new,
instance_free(in_n);
} else if (in_o) {
DEBUG(1, "Free instance %s::%s\n", in_o->srv->name, in_o->name);
- instance_stop(in_o, false);
+ instance_stop(in_o);
instance_free(in_o);
} else if (in_n) {
DEBUG(1, "Create instance %s::%s\n", in_n->srv->name, in_n->name);