diff options
author | tilghman <tilghman@f38db490-d61c-443f-a65b-d21fe96a405b> | 2007-12-18 23:06:05 +0000 |
---|---|---|
committer | tilghman <tilghman@f38db490-d61c-443f-a65b-d21fe96a405b> | 2007-12-18 23:06:05 +0000 |
commit | 11a5516081314ebee69ff72fa76d3fbf554c0488 (patch) | |
tree | dc6fe6d88013e7f3321d1468c36650a90fc0a2b8 | |
parent | f3230e3b5b2f9d81b59764085ca39988843cd6dd (diff) |
Add a canary process, for high priority mode (asterisk -p) to ensure that if
Asterisk goes into a busy loop, the machine will be recoverable. We'd still
need to do a restart to put Asterisk back into high priority mode, but at
least a reboot won't be required. (Closes issue #11559)
git-svn-id: http://svn.digium.com/svn/asterisk/trunk@93804 f38db490-d61c-443f-a65b-d21fe96a405b
-rw-r--r-- | main/asterisk.c | 73 | ||||
-rw-r--r-- | utils/Makefile | 2 | ||||
-rw-r--r-- | utils/astcanary.c | 84 |
3 files changed, 156 insertions, 3 deletions
diff --git a/main/asterisk.c b/main/asterisk.c index d869d3cbf..1af51e840 100644 --- a/main/asterisk.c +++ b/main/asterisk.c @@ -233,6 +233,8 @@ static char *_argv[256]; static int shuttingdown; static int restartnow; static pthread_t consolethread = AST_PTHREADT_NULL; +static int canary_pid = 0; +static char canary_filename[128]; static char randompool[256]; @@ -2625,6 +2627,35 @@ static void *monitor_sig_flags(void *unused) return NULL; } +static void *canary_thread(void *unused) +{ + struct stat canary_stat; + struct timeval tv; + + /* Give the canary time to sing */ + sleep(120); + + for (;;) { + stat(canary_filename, &canary_stat); + tv = ast_tvnow(); + if (tv.tv_sec > canary_stat.st_mtime + 60) { + ast_log(LOG_WARNING, "Canary is dead!!! Reducing priority\n"); + ast_set_priority(0); + pthread_exit(NULL); + } + + /* Check the canary once a minute */ + sleep(60); + } +} + +/* Used by libc's atexit(3) function */ +static void canary_exit(void) +{ + if (canary_pid > 0) + kill(canary_pid, SIGKILL); +} + int main(int argc, char *argv[]) { int c; @@ -2808,10 +2839,49 @@ int main(int argc, char *argv[]) if ((!runuser) && !ast_strlen_zero(ast_config_AST_RUN_USER)) runuser = ast_config_AST_RUN_USER; + /* Must install this signal handler up here to ensure that if the canary + * fails to execute that it doesn't kill the Asterisk process. + */ + signal(SIGCHLD, child_handler); + #ifndef __CYGWIN__ - if (isroot) + if (isroot) { ast_set_priority(ast_opt_high_priority); + if (ast_opt_high_priority) { + snprintf(canary_filename, sizeof(canary_filename), "%s/alt.asterisk.canary.tweet.tweet.tweet", ast_config_AST_RUN_DIR); + + canary_pid = fork(); + if (canary_pid == 0) { + char canary_binary[128], *lastslash; + int fd; + + /* Reset signal handler */ + signal(SIGCHLD, SIG_DFL); + + for (fd = 0; fd < 100; fd++) + close(fd); + + execlp("astcanary", "astcanary", canary_filename, NULL); + + /* If not found, try the same path as used to execute asterisk */ + ast_copy_string(canary_binary, argv[0], sizeof(canary_binary)); + if ((lastslash = strrchr(canary_binary, '/'))) { + ast_copy_string(lastslash + 1, "astcanary", sizeof(canary_binary) + canary_binary - (lastslash + 1)); + execl(canary_binary, "astcanary", canary_filename, NULL); + } + + /* Should never happen */ + _exit(1); + } else if (canary_pid > 0) { + pthread_t dont_care; + ast_pthread_create_detached(&dont_care, NULL, canary_thread, NULL); + } + + /* Kill the canary when we exit */ + atexit(canary_exit); + } + } if (isroot && rungroup) { struct group *gr; @@ -2975,7 +3045,6 @@ int main(int argc, char *argv[]) signal(SIGINT, __quit_handler); signal(SIGTERM, __quit_handler); signal(SIGHUP, hup_handler); - signal(SIGCHLD, child_handler); signal(SIGPIPE, SIG_IGN); /* ensure that the random number generators are seeded with a different value every time diff --git a/utils/Makefile b/utils/Makefile index 6cbd39818..4c6bb1114 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -17,7 +17,7 @@ ASTTOPDIR?=.. .PHONY: clean all uninstall # to get check_expr, add it to the ALL_UTILS list -ALL_UTILS:=astman smsq stereorize streamplayer aelparse muted check_expr conf2ael hashtest2 hashtest +ALL_UTILS:=astman smsq stereorize streamplayer aelparse muted check_expr conf2ael hashtest2 hashtest astcanary UTILS:=$(ALL_UTILS) LIBS += $(BKTR_LIB) # astobj2 with devmode uses backtrace diff --git a/utils/astcanary.c b/utils/astcanary.c new file mode 100644 index 000000000..eb9f17208 --- /dev/null +++ b/utils/astcanary.c @@ -0,0 +1,84 @@ +/* + * Asterisk -- An open source telephony toolkit. + * + * Copyright (C) 2007, Digium, Inc. + * + * Tilghman Lesher <tlesher AT digium DOT com> + * + * See http://www.asterisk.org for more information about + * the Asterisk project. Please do not directly contact + * any of the maintainers of this project for assistance; + * the project provides a web site, mailing lists and IRC + * channels for your use. + * + * This program is free software, distributed under the terms of + * the GNU General Public License Version 2. See the LICENSE file + * at the top of the source tree. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <utime.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> + +/*!\brief + * At one time, canaries were carried along with coal miners down + * into a mine. Their purpose was to alert the miners when they + * had drilled into a pocket of methane gas or another noxious + * substance. The canary, being the most sensitive animal would + * immediately fall over. Seeing this, the miners could take + * action to escape the mine, seeing an imminent danger. + * + * This process serves a similar purpose, though with the realtime + * priority being the reason. When a thread starts running away + * with the processor, it is typically difficult to tell what + * thread caused the problem, as the machine acts as if it is + * locked up (in fact, what has happened is that Asterisk runs at + * a higher priority than even the login shell, so the runaway + * thread hogs all available CPU time. + * + * If that happens, this canary process will cease to get any + * process time, which we can monitor with a realtime thread in + * Asterisk. Should that happen, that monitoring thread may take + * immediate action to slow down Asterisk to regular priority, + * thus allowing an administrator to login to the system and + * restart Asterisk or perhaps take another course of action + * (such as retrieving a backtrace to let the developers know + * what precisely went wrong). + * + * Note that according to POSIX.1, all threads inside a single + * process must share the same priority, so when the monitoring + * thread deprioritizes itself, it deprioritizes all threads at + * the same time. This is also why this canary must exist as a + * completely separate process and not simply as a thread within + * Asterisk itself. + */ + +int main(int argc, char *argv[]) +{ + int fd; + /* Run at normal priority */ + setpriority(PRIO_PROCESS, 0, 0); + for (;;) { + /* Update the modification times (checked from Asterisk) */ + if (utime(argv[1], NULL)) { + /* Recreate the file if it doesn't exist */ + if ((fd = open(argv[1], O_RDWR | O_TRUNC | O_CREAT)) > -1) + close(fd); + else + exit(1); + continue; + } + + /* Run occasionally */ + sleep(5); + } + + /* Never reached */ + return 0; +} + |