#define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* This program does everything in seconds granularity. * The time to accomplish a task has the number of nanoseconds * added in to specify the time for the event to be accomplished. */ char *name = PACKAGE; void usage() { syslog(LOG_ERR, "%s (" PACKAGE ") " VERSION " (" __DATE__ " " __TIME__ ")\n", name); syslog(LOG_ERR, "Copyright (C) 2021 by Multi-Tech Systems\n"); syslog(LOG_ERR,"USAGE: feedwatchdog device timeout"); syslog(LOG_ERR,"Times below are in seconds"); syslog(LOG_ERR,"ENVIRONMENT:"); syslog(LOG_ERR," PIDFILE"); syslog(LOG_ERR," FEED How often to feed watchdog in seconds"); syslog(LOG_ERR," FILESAMPLERATE How often to open/read/write/close test file"); syslog(LOG_ERR," MONITORFILE Path to test file"); syslog(LOG_ERR," MINIMUM_AVAILABLE_MEM Memory Available"); syslog(LOG_ERR," MINIMUM_FREEHIGH Available high memory"); syslog(LOG_ERR," MEMSAMPLERATE Sample rate to test memory"); syslog(LOG_ERR," MEMSAMPLES Samples to accumulate"); syslog(LOG_ERR," MEMFAILEDSAMPLES Number of samples below threshold before reboot"); syslog(LOG_ERR," SHUTDOWNTIMEOUT Set watchdog timeout seconds at SIGTERM signal"); exit(1); } // Returns true if earlier is before or matches later. int timed_out(struct timespec *later, struct timespec *earlier) { if (later->tv_sec > earlier->tv_sec) return 1; // Time has elapsed by seconds if (later->tv_sec == earlier->tv_sec) { if (later->tv_nsec >= earlier->tv_nsec) return 1; // Seconds match, and nano-seconds have elapsed } return 0; } // Return the soonest of t1, t2, and t3. Prefers t3 to t2. Prefers t2 to t1. void min_time(struct timespec *t1, struct timespec *t2, struct timespec *t3, struct timespec **soonest) { if(timed_out(t1,t2)) *soonest = t2; else *soonest = t1; if(timed_out(*soonest,t3)) *soonest = t3; } char *pidfile; int feed; // Seconds between feedings of the watchdog int filesamplerate; unsigned long long fileval_longlong = 0x5555555555555555ULL; int timeout; char *monitorfile; unsigned long long minimum_available_mem; unsigned long long minimum_freehigh; int memsamplerate; int memsamples; int memfailedsamples; char pidstr[32]; char *available_samples; char *freehigh_samples; int memsample_idx; struct timespec t0; struct timespec feed_time; // When to next feed the watchdog struct timespec file_time; // When to next write to the file struct timespec mem_time; // When to next test memory int error_count; // Cumulative errors int shutdown_timeout; int devfd = -1; void sigterm_handler(int sig) { struct sigaction hdlr_action; // Shutdown could be long if doing a flash upgrade if(devfd != -1) ioctl(devfd, WDIOC_SETTIMEOUT, &shutdown_timeout); /* syslog is not safe in a signal handler */ memset(&hdlr_action,0,sizeof hdlr_action); hdlr_action.sa_handler = SIG_DFL; sigaction(SIGTERM,&hdlr_action,NULL); kill(getpid(),SIGTERM); _exit(0); } int main(int argc, char *argv[]) { int result, count; char *tmp; int pidfd,monitorfd; char *watchdogdevicepath; struct timespec t0, *stime = NULL; struct sysinfo info; struct sigaction action; openlog(name,LOG_PERROR,LOG_DAEMON); memset(&action,0,sizeof action); action.sa_handler = sigterm_handler; sigaction(SIGTERM,&action,NULL); if (argc < 3) usage(); timeout = atoi(argv[2]); if (timeout < 1) { syslog(LOG_ERR,"feedwatchdog: timeout must be at least 1"); usage(); } pidfile = secure_getenv("PIDFILE"); if(pidfile == NULL) { pidfile = "/run/softdog-mon.pid"; syslog(LOG_ERR,"feedwatchdog: pidfile is NULL"); pidfd = open(pidfile,O_WRONLY|O_CREAT|O_TRUNC,0644); } else { syslog(LOG_ERR,"feedwatchdog: pidfile is %s",pidfile); pidfd = open(pidfile,O_WRONLY|O_CREAT|O_TRUNC,0644); } if (pidfd == -1) { syslog(LOG_ERR,"PIDFILE: Could not open %s: %d: %s",pidfile,errno,strerror(errno)); usage(); } tmp = secure_getenv("FEED"); if (tmp == NULL) { syslog(LOG_ERR,"FEED is missing"); usage(); } feed = atoi(tmp); tmp = secure_getenv("FILESAMPLERATE"); if (tmp == NULL) { syslog(LOG_ERR,"FILESAMPLERATE is missing"); usage(); } filesamplerate = atoi(tmp); // Prime the monitor file monitorfile = secure_getenv("MONITORFILE"); monitorfd = open(monitorfile,O_WRONLY|O_CREAT|O_SYNC|O_TRUNC,0644); if (monitorfd == -1) { syslog(LOG_ERR,"MONITORFILE: Could not open %s: %d: %s",monitorfile,errno,strerror(errno)); usage(); } result = write(monitorfd,&fileval_longlong,sizeof fileval_longlong); if(result != 8) { syslog(LOG_ERR,"MONITORFILE: Could not write %s: %d: %s",monitorfile,errno,strerror(errno)); usage(); } close(monitorfd); tmp = secure_getenv("MINIMUM_AVAILABLE_MEM"); if (tmp == NULL) { syslog(LOG_ERR,"MINIMUM_AVAILABLE_MEM is missing"); usage(); } minimum_available_mem = strtoull(tmp,NULL,0); tmp = secure_getenv("MINIMUM_FREEHIGH"); if (tmp == NULL) { syslog(LOG_ERR,"MINIMUM_FREEHIGH is missing"); usage(); } minimum_freehigh = strtoull(tmp,NULL,0); tmp = secure_getenv("MEMSAMPLERATE"); if (tmp == NULL) { syslog(LOG_ERR,"MEMSAMPLERATE is missing"); usage(); } memsamplerate = atoi(tmp); tmp = secure_getenv("MEMSAMPLES"); if (tmp == NULL) { syslog(LOG_ERR,"MEMSAMPLES is missing"); usage(); } memsamples = atoi(tmp); available_samples = (char *)calloc(memsamples,sizeof available_samples[0]); if(available_samples == NULL) { syslog(LOG_ERR,"ERROR: Out of memory"); exit(1); } freehigh_samples = (char *)calloc(memsamples,sizeof freehigh_samples[0]); if(available_samples == NULL) { syslog(LOG_ERR,"ERROR: Out of memory"); exit(1); } memsample_idx = 0; tmp = secure_getenv("MEMFAILEDSAMPLES"); if (tmp == NULL) { syslog(LOG_ERR,"MEMFAILEDSAMPLES is missing"); usage(); } memfailedsamples = atoi(tmp); tmp = secure_getenv("SHUTDOWNTIMEOUT"); if (tmp == NULL) { syslog(LOG_ERR,"SHUTDOWNTIMEOUT is missing"); usage(); } shutdown_timeout = atoi(tmp); watchdogdevicepath = argv[1]; syslog(LOG_ALERT,"%s: Version %s",name,VERSION); syslog(LOG_ALERT,"All times in seconds, sizes in bytes"); syslog(LOG_ALERT,"Watchdog Device: %s",watchdogdevicepath); syslog(LOG_ALERT,"Watchdog Timout in Seconds: %d",timeout); syslog(LOG_ALERT,"PID File: %s",pidfile); syslog(LOG_ALERT,"Feed: %d",feed); syslog(LOG_ALERT,"File Sample Rate %d",filesamplerate); syslog(LOG_ALERT,"Monitor File (I/O health check) %s",monitorfile); syslog(LOG_ALERT,"Minimum Available Memory %llu",minimum_available_mem); syslog(LOG_ALERT,"Minimum Free High Memory %llu",minimum_freehigh); syslog(LOG_ALERT,"Memory Sample Rate %d",memsamplerate); syslog(LOG_ALERT,"Memory Samples Collected %d",memsamples); syslog(LOG_ALERT,"Memory Failed Samples (maximum) %d",memfailedsamples); syslog(LOG_ALERT,"Shudown timeout %d",shutdown_timeout); result = clock_gettime(CLOCK_MONOTONIC, &t0); if (result == -1) { syslog(LOG_ERR,"ERROR: System Error: The system is not supporting MONOTONIC time"); exit(1); } result = fork(); switch(result) { case -1: syslog(LOG_ERR,"ERROR: Could not fork: %d: %s", errno, strerror(errno)); break; case 0: close(0); close(1); close(2); setsid(); break; default: _exit(0); } count = snprintf(pidstr,sizeof pidstr,"%u",(int)getpid()); if ((count > 0) && (pidfd >= 0) && (count < sizeof pidstr)) { pidstr[count] = '\n'; result = write(pidfd,pidstr,count+1); if (result == -1) syslog(LOG_ERR,"ERROR: Could not write to PID file: %d: %s", errno, strerror(errno)); } close(pidfd); devfd = open(argv[1],O_RDWR); if (devfd == -1) { syslog(LOG_ERR,"feedwatchdog: Could not open %s: %s",argv[1],strerror(errno)); usage(); } clock_gettime(CLOCK_MONOTONIC, &t0); feed_time = file_time = mem_time = t0; syslog(LOG_DEBUG,"****INITIAL t0: size %d:%d %lu:%lu mem_time %lu:%lu",sizeof t0.tv_sec,sizeof t0.tv_nsec,t0.tv_sec,t0.tv_nsec,mem_time.tv_sec,mem_time.tv_nsec); ioctl(devfd, WDIOC_SETTIMEOUT, &timeout); syslog(LOG_ERR,"The timeout was set to %d seconds",timeout); while (1) { clock_gettime(CLOCK_MONOTONIC, &t0); if (timed_out(&t0,&feed_time)) { ioctl(devfd, WDIOC_KEEPALIVE, 0); feed_time.tv_sec += feed; // Next time to feed the watchdog } if (timed_out(&t0,&file_time)) { unsigned long long readval; monitorfd = open(monitorfile,O_RDWR|O_SYNC); if(monitorfd == -1) { syslog(LOG_ERR,"ERROR: Could not open %s: %d: %s",monitorfile,errno,strerror(errno)); error_count++; } else { result = read(monitorfd,&readval,sizeof readval); if(result == -1) { syslog(LOG_ERR,"ERROR: Could not read %s: %d: %s",monitorfile,errno,strerror(errno)); error_count++; } if (result != sizeof readval) { syslog(LOG_ERR,"ERROR: %s: Expected to read %d, but read %d bytes",monitorfile,sizeof readval,result); error_count++; } if (readval != fileval_longlong) { syslog(LOG_ERR,"ERROR: %s: Expected to read %llu, but read %llu value",monitorfile,fileval_longlong,readval); error_count++; } fileval_longlong++; result = lseek(monitorfd,0,SEEK_SET); if(result == -1) { syslog(LOG_ERR,"ERROR: Could not rewind %s: %d: %s",monitorfile,errno,strerror(errno)); error_count++; } result = write(monitorfd,&fileval_longlong,sizeof fileval_longlong); if(result == -1) { syslog(LOG_ERR,"ERROR: Could write %s: %d: %s",monitorfile,errno,strerror(errno)); error_count++; } if(result != sizeof fileval_longlong) { syslog(LOG_ERR,"ERROR: %s: Expected to write %d, but wrote %d bytes",monitorfile,sizeof fileval_longlong,result); error_count++; } if(result == sizeof fileval_longlong) file_time.tv_sec += filesamplerate; close(monitorfd); } // Good file descriptor for monitor file. } // Monitor file timeout (write to monitor file) // Memory checks if(timed_out(&t0,&mem_time)) { unsigned long long testval; int i; int acount,fcount; syslog(LOG_DEBUG,"t0: %lu:%lu mem_time %lu:%lu",t0.tv_sec,t0.tv_nsec,mem_time.tv_sec,mem_time.tv_nsec); sysinfo(&info); testval = (info.freeram * info.mem_unit); available_samples[memsample_idx] = (testval < minimum_available_mem); testval = (info.freehigh * info.mem_unit); freehigh_samples[memsample_idx] = (testval < minimum_freehigh); memsample_idx++; memsample_idx = (memsample_idx % memsamples); acount = fcount = 0; for (i=0;i memfailedsamples) { syslog(LOG_ERR,"Memory Available failure: %llu, should be at least %llu",testval,minimum_available_mem); exit(1); } if (fcount > memfailedsamples) { syslog(LOG_ERR,"High Memory failure: %llu, should be at least %llu",testval,minimum_freehigh); exit(1); } mem_time.tv_sec += memsamplerate; } // Time to check memory. // How long do we sleep? min_time(&mem_time,&file_time,&feed_time,&stime); clock_nanosleep(CLOCK_MONOTONIC,TIMER_ABSTIME,stime,NULL); } // Loop forever. return 100; /* NOTREACHED */ }