summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Klug <john.klug@multitech.com>2021-01-19 17:21:32 -0600
committerJohn Klug <john.klug@multitech.com>2021-01-19 17:21:32 -0600
commitf8d0b344ae1b2dc3894c1a597c0565911b762742 (patch)
tree79e8f174ab88cd9c00258956f2cb2e6bed548956
downloadsoftdog-mon-master.tar.gz
softdog-mon-master.tar.bz2
softdog-mon-master.zip
softdog-mon for monitoring a system using kernel module "softdog"HEAD0.1master
-rw-r--r--AUTHORS1
-rw-r--r--COPYING23
-rw-r--r--ChangeLog1
-rw-r--r--INSTALL237
-rw-r--r--Makefile.am3
-rw-r--r--NEWS0
-rw-r--r--README84
-rw-r--r--configure.ac31
-rw-r--r--src/Makefile.am6
-rw-r--r--src/hog.c35
-rw-r--r--src/softdog-mon.c372
11 files changed, 793 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..15d3bf8
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1 @@
+Multi-Tech Systems
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..39f0c1f
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,23 @@
+
+MIT License
+
+Copyright (c) <2021> <Multi-Tech Systems>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1 @@
+
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..d3c5b40
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,237 @@
+Installation Instructions
+*************************
+
+Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
+2006, 2007 Free Software Foundation, Inc.
+
+This file is free documentation; the Free Software Foundation gives
+unlimited permission to copy, distribute and modify it.
+
+Basic Installation
+==================
+
+Briefly, the shell commands `./configure; make; make install' should
+configure, build, and install this package. The following
+more-detailed instructions are generic; see the `README' file for
+instructions specific to this package.
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions. Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, and a
+file `config.log' containing compiler output (useful mainly for
+debugging `configure').
+
+ It can also use an optional file (typically called `config.cache'
+and enabled with `--cache-file=config.cache' or simply `-C') that saves
+the results of its tests to speed up reconfiguring. Caching is
+disabled by default to prevent problems with accidental use of stale
+cache files.
+
+ If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release. If you are using the cache, and at
+some point `config.cache' contains results you don't want to keep, you
+may remove or edit it.
+
+ The file `configure.ac' (or `configure.in') is used to create
+`configure' by a program called `autoconf'. You need `configure.ac' if
+you want to change it or regenerate `configure' using a newer version
+of `autoconf'.
+
+The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system.
+
+ Running `configure' might take a while. While running, it prints
+ some messages telling which features it is checking for.
+
+ 2. Type `make' to compile the package.
+
+ 3. Optionally, type `make check' to run any self-tests that come with
+ the package.
+
+ 4. Type `make install' to install the programs and any data files and
+ documentation.
+
+ 5. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'. There is
+ also a `make maintainer-clean' target, but that is intended mainly
+ for the package's developers. If you use it, you may have to get
+ all sorts of other programs in order to regenerate files that came
+ with the distribution.
+
+ 6. Often, you can also type `make uninstall' to remove the installed
+ files again.
+
+Compilers and Options
+=====================
+
+Some systems require unusual options for compilation or linking that the
+`configure' script does not know about. Run `./configure --help' for
+details on some of the pertinent environment variables.
+
+ You can give `configure' initial values for configuration parameters
+by setting variables in the command line or in the environment. Here
+is an example:
+
+ ./configure CC=c99 CFLAGS=-g LIBS=-lposix
+
+ *Note Defining Variables::, for more details.
+
+Compiling For Multiple Architectures
+====================================
+
+You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory. To do this, you can use GNU `make'. `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script. `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+ With a non-GNU `make', it is safer to compile the package for one
+architecture at a time in the source code directory. After you have
+installed the package for one architecture, use `make distclean' before
+reconfiguring for another architecture.
+
+Installation Names
+==================
+
+By default, `make install' installs the package's commands under
+`/usr/local/bin', include files under `/usr/local/include', etc. You
+can specify an installation prefix other than `/usr/local' by giving
+`configure' the option `--prefix=PREFIX'.
+
+ You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If you
+pass the option `--exec-prefix=PREFIX' to `configure', the package uses
+PREFIX as the prefix for installing programs and libraries.
+Documentation and other data files still use the regular prefix.
+
+ In addition, if you use an unusual directory layout you can give
+options like `--bindir=DIR' to specify different values for particular
+kinds of files. Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+ If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System). The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+ For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+There may be some features `configure' cannot figure out automatically,
+but needs to determine by the type of machine the package will run on.
+Usually, assuming the package is built to be run on the _same_
+architectures, `configure' can figure that out, but if it prints a
+message saying it cannot guess the machine type, give it the
+`--build=TYPE' option. TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name which has the form:
+
+ CPU-COMPANY-SYSTEM
+
+where SYSTEM can have one of these forms:
+
+ OS KERNEL-OS
+
+ See the file `config.sub' for the possible values of each field. If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the machine type.
+
+ If you are _building_ compiler tools for cross-compiling, you should
+use the option `--target=TYPE' to select the type of system they will
+produce code for.
+
+ If you want to _use_ a cross compiler, that generates code for a
+platform different from the build platform, you should specify the
+"host" platform (i.e., that on which the generated programs will
+eventually be run) with `--host=TYPE'.
+
+Sharing Defaults
+================
+
+If you want to set default values for `configure' scripts to share, you
+can create a site shell script called `config.site' that gives default
+values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists. Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Defining Variables
+==================
+
+Variables not defined in a site shell script can be set in the
+environment passed to `configure'. However, some packages may run
+configure again during the build, and the customized values of these
+variables may be lost. In order to avoid this problem, you should set
+them in the `configure' command line, using `VAR=value'. For example:
+
+ ./configure CC=/usr/local2/bin/gcc
+
+causes the specified `gcc' to be used as the C compiler (unless it is
+overridden in the site shell script).
+
+Unfortunately, this technique does not work for `CONFIG_SHELL' due to
+an Autoconf bug. Until the bug is fixed you can use this workaround:
+
+ CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
+
+`configure' Invocation
+======================
+
+`configure' recognizes the following options to control how it operates.
+
+`--help'
+`-h'
+ Print a summary of the options to `configure', and exit.
+
+`--version'
+`-V'
+ Print the version of Autoconf used to generate the `configure'
+ script, and exit.
+
+`--cache-file=FILE'
+ Enable the cache: use and save the results of the tests in FILE,
+ traditionally `config.cache'. FILE defaults to `/dev/null' to
+ disable caching.
+
+`--config-cache'
+`-C'
+ Alias for `--cache-file=config.cache'.
+
+`--quiet'
+`--silent'
+`-q'
+ Do not print messages saying which checks are being made. To
+ suppress all normal output, redirect it to `/dev/null' (any error
+ messages will still be shown).
+
+`--srcdir=DIR'
+ Look for the package's source code in directory DIR. Usually
+ `configure' can determine that directory automatically.
+
+`configure' also accepts some other, not widely useful, options. Run
+`configure --help' for more details.
+
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..8f99788
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,3 @@
+AUTOMAKE_OPTIONS = gnu
+SUBDIRS = src
+
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/NEWS
diff --git a/README b/README
new file mode 100644
index 0000000..6d0e22f
--- /dev/null
+++ b/README
@@ -0,0 +1,84 @@
+Configurable monitor program that uses softdog.
+
+This program (softdog-mon) must be running all
+the time once it is started, or the
+system will reset itself without a shutdown.
+The problems detected may prevent shutdown, so a
+reset is safer.
+
+The SHUTDOWNTIMEOUT value is the time to allow for
+shutdown. Since firmware updates are done during
+shutdown, this should be the worst case time for
+shutdown.
+
+Variables passed through the environment:
+
+# Monitor program will have 1 second granularity. Fixed.
+# All times are in seconds.
+
+# Hardware watchdog is found first, which is watchdog0.
+WATCHDOG = /dev/watchdog1
+
+# Nice value -20, is highest priority for a user program, 19 is lowest.
+NICE = -20
+
+# Watchdog timeout in seconds
+TIMEOUT = 60
+
+# How often to feed in seconds
+FEED = 10
+
+# File is synchonously open/read/written/closed every 30 seconds
+FILESAMPLERATE = 30
+
+# File to be read/written
+# If I/O hangs, the TIMEOUT value is the maximum seconds until we
+# reset the device.
+MONITORFILE = /media/card/.softdog_monitor
+
+# Minimum available system memory in bytes
+MINIMUM_AVAILABLE_MEM = 3000000
+
+# Minimum free high memory
+MINIMUM_FREEHIGH = 0
+
+# Rate at which we sample available memory
+MEMSAMPLERATE = 3
+
+# last samples saved
+MEMSAMPLES = 100
+
+# maximum number of samples failed in last samples saved
+MEMFAILEDSAMPLES = 20
+
+# Allow time for flash upgrade during shutdown
+# This happens when a SIGTERM signal is received.
+# So shutdown has this many seconds to complete.
+SHUTDOWNTIMEOUT=600
+
+Their is an additional test program called
+hog. This can be used to acquire memory and kernel
+resources.
+
+hog 4750000
+
+This will start five processes with 4750000 bytes
+of memory. The idea is to trigger the watchdog.
+
+ hog 4750000
+
+Creates five processes with the amount of memory
+specified.
+
+In a typical test:
+
+Log into the device several times with ssh, and do sudo -s
+and acquire a root shell.
+
+As root start the program hog, the amount of memory required
+will depend on the size of the programs typically running.
+
+Start top on the several screens logged in. Try to get the
+available memory below 3MB. Once 20 samples have failed
+the device will reboot.
+
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..5e6a888
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,31 @@
+AC_INIT([softdog-mon],0.1.0)
+AC_CONFIG_SRCDIR([src/softdog-mon.c])
+AM_CONFIG_HEADER([config.h])
+AM_INIT_AUTOMAKE
+
+
+AC_PROG_CC
+AC_PROG_CXX
+AC_PROG_INSTALL
+AC_PROG_MAKE_SET
+
+AC_HEADER_STDC
+
+AC_CHECK_HEADERS([ \
+sys/types.h \
+sys/stat.h \
+sys/ioctl.h \
+linux/watchdog.h \
+fcntl.h \
+stdio.h \
+string.h \
+errno.h \
+stdlib.h \
+unistd.h \
+], [],AC_MSG_ERROR([
+required header missing]))
+
+AC_DEFINE([DEBUG], 0, [set to 1 to enable debug])
+
+AC_OUTPUT([Makefile src/Makefile])
+
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..dba3d24
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,6 @@
+## Process this file with automake to produce Makefile.in
+AUTOMAKE_OPTIONS = gnu
+AM_CFLAGS = -Wall
+
+sbin_PROGRAMS = softdog-mon hog
+
diff --git a/src/hog.c b/src/hog.c
new file mode 100644
index 0000000..d887f4d
--- /dev/null
+++ b/src/hog.c
@@ -0,0 +1,35 @@
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+// Memory hog
+int
+main(int argc, const char *argv[])
+{
+ int j,k;
+ char *p;
+ int pid;
+ int mallocsize;
+
+ if (argc < 2) {
+ fprintf(stderr,"Need malloc size parameter\n");
+ exit(1);
+ }
+ mallocsize = atoi(argv[1]);
+ for (k=0; k<5; k++) {
+ p = malloc(mallocsize);
+ if (p)
+ for(j=0;j<mallocsize;j++)
+ p[j] = j;
+ pid = fork();
+
+ if (pid)
+ fprintf(stderr,"pid=%d\n",pid);
+ }
+ pid = getpid();
+ for(j=0;j<mallocsize;j++)
+ p[j] = pid;
+
+ pause();
+ // NOTREACHED
+ return (0);
+}
diff --git a/src/softdog-mon.c b/src/softdog-mon.c
new file mode 100644
index 0000000..75591dd
--- /dev/null
+++ b/src/softdog-mon.c
@@ -0,0 +1,372 @@
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
+#include <linux/watchdog.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <config.h>
+#include <errno.h>
+#include <syslog.h>
+#include <time.h>
+#include <signal.h>
+
+/* This program does everything in seconds granularity.
+ * The time to accomplish a task has the number of nanoseconds
+ * added in to specify the time for the event to be accomplished.
+ */
+
+char *name = PACKAGE;
+
+void usage()
+{
+ syslog(LOG_ERR, "%s (" PACKAGE ") " VERSION " (" __DATE__ " " __TIME__ ")\n", name);
+ syslog(LOG_ERR, "Copyright (C) 2021 by Multi-Tech Systems\n");
+
+ syslog(LOG_ERR,"USAGE: feedwatchdog device timeout");
+ syslog(LOG_ERR,"Times below are in seconds");
+ syslog(LOG_ERR,"ENVIRONMENT:");
+ syslog(LOG_ERR," PIDFILE");
+ syslog(LOG_ERR," FEED How often to feed watchdog in seconds");
+ syslog(LOG_ERR," FILESAMPLERATE How often to open/read/write/close test file");
+ syslog(LOG_ERR," MONITORFILE Path to test file");
+ syslog(LOG_ERR," MINIMUM_AVAILABLE_MEM Memory Available");
+ syslog(LOG_ERR," MINIMUM_FREEHIGH Available high memory");
+ syslog(LOG_ERR," MEMSAMPLERATE Sample rate to test memory");
+ syslog(LOG_ERR," MEMSAMPLES Samples to accumulate");
+ syslog(LOG_ERR," MEMFAILEDSAMPLES Number of samples below threshold before reboot");
+ syslog(LOG_ERR," SHUTDOWNTIMEOUT Set watchdog timeout seconds at SIGTERM signal");
+ exit(1);
+}
+
+
+// Returns true if earlier is before or matches later.
+int
+timed_out(struct timespec *later, struct timespec *earlier)
+{
+ if (later->tv_sec > earlier->tv_sec)
+ return 1; // Time has elapsed by seconds
+
+ if (later->tv_sec == earlier->tv_sec) {
+ if (later->tv_nsec >= earlier->tv_nsec)
+ return 1; // Seconds match, and nano-seconds have elapsed
+ }
+ return 0;
+}
+
+
+// Return the soonest of t1, t2, and t3. Prefers t3 to t2. Prefers t2 to t1.
+void
+min_time(struct timespec *t1, struct timespec *t2, struct timespec *t3, struct timespec **soonest) {
+ if(timed_out(t1,t2))
+ *soonest = t2;
+ else
+ *soonest = t1;
+ if(timed_out(*soonest,t3))
+ *soonest = t3;
+}
+
+
+char *pidfile;
+int feed; // Seconds between feedings of the watchdog
+int filesamplerate;
+unsigned long long fileval_longlong = 0x5555555555555555ULL;
+int timeout;
+char *monitorfile;
+unsigned long long minimum_available_mem;
+unsigned long long minimum_freehigh;
+int memsamplerate;
+int memsamples;
+int memfailedsamples;
+char pidstr[32];
+char *available_samples;
+char *freehigh_samples;
+int memsample_idx;
+struct timespec t0;
+struct timespec feed_time; // When to next feed the watchdog
+struct timespec file_time; // When to next write to the file
+struct timespec mem_time; // When to next test memory
+int error_count; // Cumulative errors
+int shutdown_timeout;
+int devfd = -1;
+
+void
+sigterm_handler(int sig)
+{
+ struct sigaction hdlr_action;
+ // Shutdown could be long if doing a flash upgrade
+ if(devfd != -1)
+ ioctl(devfd, WDIOC_SETTIMEOUT, &shutdown_timeout);
+ /* syslog is not safe in a signal handler */
+ memset(&hdlr_action,0,sizeof hdlr_action);
+ hdlr_action.sa_handler = SIG_DFL;
+ sigaction(SIGTERM,&hdlr_action,NULL);
+ kill(getpid(),SIGTERM);
+ _exit(0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int result, count;
+ char *tmp;
+ int pidfd,monitorfd;
+ char *watchdogdevicepath;
+ struct timespec t0, *stime = NULL;
+ struct sysinfo info;
+ struct sigaction action;
+
+ openlog(name,LOG_PERROR,LOG_DAEMON);
+
+ memset(&action,0,sizeof action);
+ action.sa_handler = sigterm_handler;
+ sigaction(SIGTERM,&action,NULL);
+
+ if (argc < 3)
+ usage();
+ timeout = atoi(argv[2]);
+ if (timeout < 1) {
+ syslog(LOG_ERR,"feedwatchdog: timeout must be at least 1");
+ usage();
+ }
+ pidfile = secure_getenv("PIDFILE");
+ if(pidfile == NULL) {
+ pidfile = "/run/softdog-mon.pid";
+ syslog(LOG_ERR,"feedwatchdog: pidfile is NULL");
+ pidfd = open(pidfile,O_WRONLY|O_CREAT|O_TRUNC,0644);
+ } else {
+ syslog(LOG_ERR,"feedwatchdog: pidfile is %s",pidfile);
+ pidfd = open(pidfile,O_WRONLY|O_CREAT|O_TRUNC,0644);
+ }
+
+ if (pidfd == -1) {
+ syslog(LOG_ERR,"PIDFILE: Could not open %s: %d: %s",pidfile,errno,strerror(errno));
+ usage();
+ }
+
+ tmp = secure_getenv("FEED");
+ if (tmp == NULL) {
+ syslog(LOG_ERR,"FEED is missing");
+ usage();
+ }
+ feed = atoi(tmp);
+
+ tmp = secure_getenv("FILESAMPLERATE");
+ if (tmp == NULL) {
+ syslog(LOG_ERR,"FILESAMPLERATE is missing");
+ usage();
+ }
+ filesamplerate = atoi(tmp);
+
+ // Prime the monitor file
+ monitorfile = secure_getenv("MONITORFILE");
+ monitorfd = open(monitorfile,O_WRONLY|O_CREAT|O_SYNC|O_TRUNC,0644);
+ if (monitorfd == -1) {
+ syslog(LOG_ERR,"MONITORFILE: Could not open %s: %d: %s",monitorfile,errno,strerror(errno));
+ usage();
+ }
+ result = write(monitorfd,&fileval_longlong,sizeof fileval_longlong);
+ if(result != 8) {
+ syslog(LOG_ERR,"MONITORFILE: Could not write %s: %d: %s",monitorfile,errno,strerror(errno));
+ usage();
+ }
+ close(monitorfd);
+
+ tmp = secure_getenv("MINIMUM_AVAILABLE_MEM");
+ if (tmp == NULL) {
+ syslog(LOG_ERR,"MINIMUM_AVAILABLE_MEM is missing");
+ usage();
+ }
+ minimum_available_mem = strtoull(tmp,NULL,0);
+
+ tmp = secure_getenv("MINIMUM_FREEHIGH");
+ if (tmp == NULL) {
+ syslog(LOG_ERR,"MINIMUM_FREEHIGH is missing");
+ usage();
+ }
+ minimum_freehigh = strtoull(tmp,NULL,0);
+
+
+ tmp = secure_getenv("MEMSAMPLERATE");
+ if (tmp == NULL) {
+ syslog(LOG_ERR,"MEMSAMPLERATE is missing");
+ usage();
+ }
+ memsamplerate = atoi(tmp);
+
+ tmp = secure_getenv("MEMSAMPLES");
+ if (tmp == NULL) {
+ syslog(LOG_ERR,"MEMSAMPLES is missing");
+ usage();
+ }
+ memsamples = atoi(tmp);
+ available_samples = (char *)calloc(memsamples,sizeof available_samples[0]);
+ if(available_samples == NULL) {
+ syslog(LOG_ERR,"ERROR: Out of memory");
+ exit(1);
+ }
+ freehigh_samples = (char *)calloc(memsamples,sizeof freehigh_samples[0]);
+ if(available_samples == NULL) {
+ syslog(LOG_ERR,"ERROR: Out of memory");
+ exit(1);
+ }
+ memsample_idx = 0;
+
+ tmp = secure_getenv("MEMFAILEDSAMPLES");
+ if (tmp == NULL) {
+ syslog(LOG_ERR,"MEMFAILEDSAMPLES is missing");
+ usage();
+ }
+ memfailedsamples = atoi(tmp);
+
+ tmp = secure_getenv("SHUTDOWNTIMEOUT");
+ if (tmp == NULL) {
+ syslog(LOG_ERR,"SHUTDOWNTIMEOUT is missing");
+ usage();
+ }
+ shutdown_timeout = atoi(tmp);
+
+ watchdogdevicepath = argv[1];
+ syslog(LOG_ALERT,"%s: Version %s",name,VERSION);
+ syslog(LOG_ALERT,"All times in seconds, sizes in bytes");
+ syslog(LOG_ALERT,"Watchdog Device: %s",watchdogdevicepath);
+ syslog(LOG_ALERT,"Watchdog Timout in Seconds: %d",timeout);
+ syslog(LOG_ALERT,"PID File: %s",pidfile);
+ syslog(LOG_ALERT,"Feed: %d",feed);
+ syslog(LOG_ALERT,"File Sample Rate %d",filesamplerate);
+ syslog(LOG_ALERT,"Monitor File (I/O health check) %s",monitorfile);
+ syslog(LOG_ALERT,"Minimum Available Memory %llu",minimum_available_mem);
+ syslog(LOG_ALERT,"Minimum Free High Memory %llu",minimum_freehigh);
+ syslog(LOG_ALERT,"Memory Sample Rate %d",memsamplerate);
+ syslog(LOG_ALERT,"Memory Samples Collected %d",memsamples);
+ syslog(LOG_ALERT,"Memory Failed Samples (maximum) %d",memfailedsamples);
+ syslog(LOG_ALERT,"Shudown timeout %d",shutdown_timeout);
+ result = clock_gettime(CLOCK_MONOTONIC, &t0);
+ if (result == -1) {
+ syslog(LOG_ERR,"ERROR: System Error: The system is not supporting MONOTONIC time");
+ exit(1);
+ }
+ result = fork();
+ switch(result)
+ {
+ case -1:
+ syslog(LOG_ERR,"ERROR: Could not fork: %d: %s", errno, strerror(errno));
+ break;
+ case 0:
+ close(0); close(1); close(2);
+ setsid();
+ break;
+ default:
+ _exit(0);
+ }
+ count = snprintf(pidstr,sizeof pidstr,"%u",(int)getpid());
+ if ((count > 0) && (pidfd >= 0) && (count < sizeof pidstr)) {
+ pidstr[count] = '\n';
+ result = write(pidfd,pidstr,count+1);
+ if (result == -1)
+ syslog(LOG_ERR,"ERROR: Could not write to PID file: %d: %s", errno, strerror(errno));
+ }
+ close(pidfd);
+
+ devfd = open(argv[1],O_RDWR);
+ if (devfd == -1) {
+ syslog(LOG_ERR,"feedwatchdog: Could not open %s: %s",argv[1],strerror(errno));
+ usage();
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &t0);
+ feed_time = file_time = mem_time = t0;
+ syslog(LOG_DEBUG,"****INITIAL t0: size %d:%d %lu:%lu mem_time %lu:%lu",sizeof t0.tv_sec,sizeof t0.tv_nsec,t0.tv_sec,t0.tv_nsec,mem_time.tv_sec,mem_time.tv_nsec);
+
+ ioctl(devfd, WDIOC_SETTIMEOUT, &timeout);
+ syslog(LOG_ERR,"The timeout was set to %d seconds",timeout);
+
+
+ while (1) {
+ clock_gettime(CLOCK_MONOTONIC, &t0);
+ if (timed_out(&t0,&feed_time)) {
+ ioctl(devfd, WDIOC_KEEPALIVE, 0);
+ feed_time.tv_sec += feed; // Next time to feed the watchdog
+ }
+ if (timed_out(&t0,&file_time)) {
+ unsigned long long readval;
+ monitorfd = open(monitorfile,O_RDWR|O_SYNC);
+ if(monitorfd == -1) {
+ syslog(LOG_ERR,"ERROR: Could not open %s: %d: %s",monitorfile,errno,strerror(errno));
+ error_count++;
+ } else {
+ result = read(monitorfd,&readval,sizeof readval);
+ if(result == -1) {
+ syslog(LOG_ERR,"ERROR: Could not read %s: %d: %s",monitorfile,errno,strerror(errno));
+ error_count++;
+ }
+ if (result != sizeof readval) {
+ syslog(LOG_ERR,"ERROR: %s: Expected to read %d, but read %d bytes",monitorfile,sizeof readval,result);
+ error_count++;
+ }
+ if (readval != fileval_longlong) {
+ syslog(LOG_ERR,"ERROR: %s: Expected to read %llu, but read %llu value",monitorfile,fileval_longlong,readval);
+ error_count++;
+ }
+ fileval_longlong++;
+ result = lseek(monitorfd,0,SEEK_SET);
+ if(result == -1) {
+ syslog(LOG_ERR,"ERROR: Could not rewind %s: %d: %s",monitorfile,errno,strerror(errno));
+ error_count++;
+ }
+ result = write(monitorfd,&fileval_longlong,sizeof fileval_longlong);
+ if(result == -1) {
+ syslog(LOG_ERR,"ERROR: Could write %s: %d: %s",monitorfile,errno,strerror(errno));
+ error_count++;
+ }
+ if(result != sizeof fileval_longlong) {
+ syslog(LOG_ERR,"ERROR: %s: Expected to write %d, but wrote %d bytes",monitorfile,sizeof fileval_longlong,result);
+ error_count++;
+ }
+ if(result == sizeof fileval_longlong)
+ file_time.tv_sec += filesamplerate;
+ close(monitorfd);
+ } // Good file descriptor for monitor file.
+ } // Monitor file timeout (write to monitor file)
+
+ // Memory checks
+ if(timed_out(&t0,&mem_time)) {
+ unsigned long long testval;
+ int i;
+ int acount,fcount;
+
+ syslog(LOG_DEBUG,"t0: %lu:%lu mem_time %lu:%lu",t0.tv_sec,t0.tv_nsec,mem_time.tv_sec,mem_time.tv_nsec);
+ sysinfo(&info);
+ testval = (info.freeram * info.mem_unit);
+ available_samples[memsample_idx] = (testval < minimum_available_mem);
+ testval = (info.freehigh * info.mem_unit);
+ freehigh_samples[memsample_idx] = (testval < minimum_freehigh);
+ memsample_idx++;
+ memsample_idx = (memsample_idx % memsamples);
+ acount = fcount = 0;
+ for (i=0;i<memsamples;i++) {
+ acount += available_samples[i];
+ fcount += freehigh_samples[i];
+ }
+ syslog(LOG_DEBUG,"mem samples: acount:%u fcount:%u memsamples:%u",acount,fcount,memsamples);
+ if (acount > memfailedsamples) {
+ syslog(LOG_ERR,"Memory Available failure: %llu, should be at least %llu",testval,minimum_available_mem);
+ exit(1);
+ }
+ if (fcount > memfailedsamples) {
+ syslog(LOG_ERR,"High Memory failure: %llu, should be at least %llu",testval,minimum_freehigh);
+ exit(1);
+ }
+ mem_time.tv_sec += memsamplerate;
+ } // Time to check memory.
+ // How long do we sleep?
+ min_time(&mem_time,&file_time,&feed_time,&stime);
+ clock_nanosleep(CLOCK_MONOTONIC,TIMER_ABSTIME,stime,NULL);
+ } // Loop forever.
+ return 100; /* NOTREACHED */
+}