📄 watchdog.c
字号:
int main(int argc, char *const argv[]){ FILE *fp; int c, force = FALSE, sync_it = FALSE; int hold; char *filename = CONFIG_FILENAME; struct list *act; pid_t child_pid;#if USE_SYSLOG char *opts = "d:i:n:fsvbql:p:t:c:r:m:a:"; struct option long_options[] = { {"config-file", required_argument, NULL, 'c'}, {"force", no_argument, NULL, 'f'}, {"sync", no_argument, NULL, 's'}, {"no-action", no_argument, NULL, 'q'}, {"verbose", no_argument, NULL, 'v'}, {"softboot", no_argument, NULL, 'b'}, {NULL, 0, NULL, 0} }; long count = 0L;#else /* USE_SYSLOG */ char *opts = "d:i:n:fsbql:p:t:c:r:m:a:"; struct option long_options[] = { {"config-file", required_argument, NULL, 'c'}, {"force", no_argument, NULL, 'f'}, {"sync", no_argument, NULL, 's'}, {"no-action", no_argument, NULL, 'q'}, {"softboot", no_argument, NULL, 'b'}, {NULL, 0, NULL, 0} };#endif /* USE_SYSLOG */ progname = basename(argv[0]); /* check the options */ /* there aren't that many any more */ while ((c = getopt_long(argc, argv, opts, long_options, NULL)) != EOF) { if (c == -1) break; switch (c) { case 'n': case 'p': case 'a': case 'r': case 'd': case 't': case 'l': case 'm': case 'i': old_option(c, filename); break; case 'c': filename = optarg; break; case 'f': force = TRUE; break; case 's': sync_it = TRUE; break; case 'b': softboot = TRUE; break; case 'q': no_act = TRUE; break;#if USE_SYSLOG case 'v': verbose = TRUE; break;#endif /* USE_SYSLOG */ default: usage(); } } read_config(filename, progname); if (tint < 0) usage(); if (tint >= TIMER_MARGIN && !force) { fprintf(stderr, "%s error:\n", progname); fprintf(stderr, "This interval length might reboot the system while the process sleeps!\n"); fprintf(stderr, "To force this interval length use the -f option.\n"); exit(1); } if (maxload1 > 0 && maxload1 < MINLOAD && !force) { fprintf(stderr, "%s error:\n", progname); fprintf(stderr, "Using this maximal load average might reboot the system to often!\n"); fprintf(stderr, "To force this load average use the -f option.\n"); exit(1); } /* make sure we get our own directory in /var/log */ if (mkdir ("/var/log/watchdog", 0750) && errno != EEXIST) { fprintf(stderr, "%s error:\n", progname); fprintf(stderr, "Cannot create directory /var/log/watchdog\n"); exit (1); } /* set up pinging if in ping mode */ if (target != NULL) { for (act = target; act != NULL; act = act->next) { struct protoent *proto; struct pingmode *net = (struct pingmode *) calloc(1, sizeof(struct pingmode)); if (net == NULL) { fprintf(stderr, "%s: out of memory\n", progname); exit(1); } /* setup the socket */ memset(&(net->to), 0, sizeof(struct sockaddr)); ((struct sockaddr_in *) &(net->to))->sin_family = AF_INET; if ((((struct sockaddr_in *) &(net->to))->sin_addr.s_addr = inet_addr(act->name)) == (unsigned int) -1) { (void) fprintf(stderr, "%s: unknown host %s\n", progname, act->name); exit(1); } if (!(net->packet = (unsigned char *) malloc((unsigned int) (DATALEN + MAXIPLEN + MAXICMPLEN)))) { fprintf(stderr, "%s: out of memory\n", progname); exit(1); } if (!(proto = getprotobyname("icmp"))) { (void) fprintf(stderr, "%s: unknown protocol icmp.\n", progname); exit(1); } if ((net->sock_fp = socket(AF_INET, SOCK_RAW, proto->p_proto)) < 0) { perror(progname); exit(1); } /* this is necessary for broadcast pings to work */ (void) setsockopt(net->sock_fp, SOL_SOCKET, SO_BROADCAST, (char *)&hold, sizeof(hold)); hold = 48 * 1024; (void) setsockopt(net->sock_fp, SOL_SOCKET, SO_RCVBUF, (char *) &hold, sizeof(hold)); act->parameter.net = *net; } } /* make sure we're on the root partition */ if (chdir("/") < 0) { perror(progname); exit(1); }#if !defined(DEBUG) /* fork to go into the background */ if ((child_pid = fork()) < 0) { perror(progname); exit(1); } else if (child_pid > 0) { /* fork was okay */ /* wait for child to exit */ if (waitpid(child_pid, NULL, 0) != child_pid) { perror(progname); exit(1); } /* and exit myself */ exit(0); } /* and fork again to make sure we inherit all rights from init */ if ((child_pid = fork()) < 0) { perror(progname); exit(1); } else if (child_pid > 0) exit(0);#endif /* !DEBUG */ /* now we're free */#if USE_SYSLOG#if !defined(DEBUG) /* Okay, we're a daemon */ /* but we're still attached to the tty */ /* create our own session */ setsid(); /* with USE_SYSLOG we don't do any console IO */ close(0); close(1); close(2);#endif /* !DEBUG */ /* Log the starting message */ openlog(progname, LOG_PID, LOG_DAEMON); syslog(LOG_INFO, "starting daemon (%d.%d):", MAJOR_VERSION, MINOR_VERSION); syslog(LOG_INFO, "int=%ds realtime=%s sync=%s soft=%s mla=%d mem=%ld", tint, realtime ? "yes" : "no", sync_it ? "yes" : "no", softboot ? "yes" : "no", maxload1, minpages); if (target == NULL) syslog(LOG_INFO, "ping: no machine to check"); else for (act = target; act != NULL; act = act->next) syslog(LOG_INFO, "ping: %s", act->name); if (file == NULL) syslog(LOG_INFO, "file: no file to check"); else for (act = file; act != NULL; act = act->next) syslog(LOG_INFO, "file: %s:%d", act->name, act->parameter.file.mtime); if (pidfile == NULL) syslog(LOG_INFO, "pidfile: no server process to check"); else for (act = pidfile; act != NULL; act = act->next) syslog(LOG_INFO, "pidfile: %s", act->name); if (iface == NULL) syslog(LOG_INFO, "interface: no interface to check"); else for (act = iface; act != NULL; act = act->next) syslog(LOG_INFO, "interface: %s", act->name); syslog(LOG_INFO, "test=%s(%d) repair=%s alive=%s heartbeat=%s temp=%s to=%s no_act=%s", (tbinary == NULL) ? "none" : tbinary, timeout, (rbinary == NULL) ? "none" : rbinary, (devname == NULL) ? "none" : devname, (heartbeat == NULL) ? "none" : heartbeat, (tempname == NULL) ? "none" : tempname, (admin == NULL) ? "noone" : admin, (no_act == TRUE) ? "yes" : "no");#endif /* USE_SYSLOG */ /* open the device */ if (devname != NULL && no_act == FALSE) { watchdog = open(devname, O_WRONLY); if (watchdog == -1) {#if USE_SYSLOG syslog(LOG_ERR, "cannot open %s (errno = %d = '%m')", devname, errno);#else /* USE_SYSLOG */ perror(progname);#endif /* USE_SYSLOG */ /* do not exit here per default */ /* we can use watchdog even if there is no watchdog device */ } } /* MJ 16/2/2000, need to keep track of the watchdog writes so that I can have a potted history of recent reboots */ if ( heartbeat != NULL ) { hb = ((hb = fopen(heartbeat, "r+")) == NULL) ? fopen(heartbeat, "w+") : hb; if ( hb == NULL ) {#if USE_SYSLOG syslog(LOG_ERR, "cannot open %s (errno = %d = '%m')", heartbeat, errno);#else perror(progname);#endif } else { char rbuf[TS_SIZE + 1]; /* Allocate memory for keeping the timestamps in */ nrts = 0; lastts = 0; timestamps = (unsigned char *) calloc(hbstamps, TS_SIZE); if ( timestamps == NULL ) {#if USE_SYSLOG syslog(LOG_ERR, "cannot allocate memory for timestamps (errno = %d = '%m')", errno);#else /* USE_SYSLOG */ perror(progname);#endif /* USE_SYSLOG */ } else { /* read any previous timestamps */ rewind(hb); while ( fgets(rbuf, TS_SIZE + 1, hb) != NULL ) { memcpy(timestamps + (TS_SIZE * lastts), rbuf, TS_SIZE); if (nrts < hbstamps) nrts++; lastts = ++lastts % hbstamps; } /* Write an indication that the watchdog has started to the heartbeat file */ /* copy it to the buffer */ sprintf(rbuf, "%*s\n", TS_SIZE - 1, "--restart--"); memcpy(timestamps + (lastts * TS_SIZE), rbuf, TS_SIZE); // success if (nrts < hbstamps) nrts++; lastts = ++lastts % hbstamps; } } } if (maxload1 > 0) { /* open the load average file */ load = open("/proc/loadavg", O_RDONLY); if (load == -1) {#if USE_SYSLOG syslog(LOG_ERR, "cannot open /proc/loadavg (errno = %d = '%m')", errno);#else /* USE_SYSLOG */ perror(progname);#endif /* USE_SYSLOG */ } } if (minpages > 0) { /* open the memory info file */ mem = open("/proc/meminfo", O_RDONLY); if (mem == -1) {#if USE_SYSLOG syslog(LOG_ERR, "cannot open /proc/meminfo (errno = %d = '%m')", errno);#else /* USE_SYSLOG */ perror(progname);#endif /* USE_SYSLOG */ } } if (tempname != NULL && no_act == FALSE) { /* open the temperature file */ temp = open(tempname, O_RDONLY); if (temp == -1) {#if USE_SYSLOG syslog(LOG_ERR, "cannot open %s (errno = %d = '%m')", tempname, errno);#else /* USE_SYSLOG */ perror(progname);#endif /* USE_SYSLOG */ } } /* tuck my process id away */ fp = fopen(PIDFILE, "w"); if (fp != NULL) { fprintf(fp, "%d\n", pid = getpid()); (void) fclose(fp); } /* set signal term to call terminate() */ /* to make sure watchdog device is closed */ signal(SIGTERM, terminate);#if defined(_POSIX_MEMLOCK) if (realtime == TRUE) { /* lock all actual and future pages into memory */ if (mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {#if USE_SYSLOG syslog(LOG_ERR, "cannot lock realtime memory (errno = %d = '%m')", errno);#else /* USE_SYSLOG */ perror(progname);#endif /* USE_SYSLOG */ } else { struct sched_param sp; /* now set the scheduler */ sp.sched_priority = schedprio; if (sched_setscheduler(0, SCHED_RR, &sp) != 0) {#if USE_SYSLOG syslog(LOG_ERR, "cannot set scheduler (errno = %d = '%m')", errno);#else /* USE_SYSLOG */ perror(progname);#endif /* USE_SYSLOG */ } else mlocked = TRUE; } }#endif /* main loop: update after <tint> seconds */ while (1) { wd_action(keep_alive(), rbinary); /* sync system if we have to */ do_check(sync_system(sync_it), rbinary); /* check file table */ do_check(check_file_table(), rbinary); /* check load average */ do_check(check_load(), rbinary); /* check free memory */ do_check(check_memory(), rbinary); /* check temperature */ do_check(check_temp(), rbinary); /* in filemode stat file */ for (act = file; act != NULL; act = act->next) do_check(check_file_stat(act), rbinary); /* in pidmode kill -0 processes */ for (act = pidfile; act != NULL; act = act->next) do_check(check_pidfile(act), rbinary); /* in network mode check the given devices for input */ for (act = iface; act != NULL; act = act->next) do_check(check_iface(act), rbinary); /* in ping mode ping the ip address */ for (act = target; act != NULL; act = act->next) do_check(check_net(act->name, act->parameter.net.sock_fp, act->parameter.net.to, act->parameter.net.packet, tint , pingcount), rbinary); /* in user mode execute the given binary or just test fork() call */ do_check(check_bin(tbinary, timeout), rbinary); /* finally sleep some seconds */ sleep((tint >> 1) + (tint % 2)); /* this should make watchdog sleep tint seconds alltogther */ /* sleep(tint); */#if USE_SYSLOG /* do verbose logging */ if (verbose && logtick && (--ticker == 0)) { ticker = logtick; count += logtick; syslog(LOG_INFO, "still alive after %ld interval(s)", count); }#endif /* USE_SYSLOG */ }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -