Redirect stderr when calling shutdown, too
[trigger-rcmd.git] / scripts / trigger-rcmd
1 #!/bin/sh
2 #
3 # trigger-rcmp: Trigger remote commands
4 # Copyright (c)2014-2017 Alexander Barton (alex@barton.de)
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 # Please read the file COPYING, README and AUTHORS for more information.
11 #
12
13 BASENAME=$(basename "$0")
14 NAME="$BASENAME"
15 RCMD_D="/usr/local/etc/rcmd.d"
16
17 PATH="$PATH:/usr/local/sbin:/usr/sbin"
18
19 do_help() {
20         echo "Usage: $0 [--no-wake|-n] <rcmd> [<rcmd> [...]]" >&2
21         exit 2
22 }
23
24 do_rcmd() {
25         SYSTEM="localhost"
26         IFACE="eth0"
27         CMD="uptime"
28         TIMEOUT="300"
29         SSH_CHECKS="6"
30         WAKE_DELAY="10s"
31         BOOT_DELAY="1m"
32         SHUTDOWN_TIME="+10"
33
34         # Read in job/configuration file, but ignore shellcheck(1) warnings:
35         # shellcheck disable=1090
36         . "$cnf"
37
38         if [ $? -ne 0 ]; then
39                 echo "$NAME: Failed to read \"$cnf\"!"
40                 return 2
41         fi
42
43         echo "$NAME: Checking system \"$SYSTEM\" ..."
44         fping -c1 -q "$SYSTEM" 2>/dev/null
45         if [ $? -ne 0 ]; then
46                 if [ -n "$NO_WAKE" ]; then
47                         echo "$NAME: \"$SYSTEM\" seems to be down, skipping job."
48                         return 9
49                 fi
50
51                 echo "$NAME: \"$SYSTEM\" seems to be down, wake it up ..."
52                 etherwake -b -i "$IFACE" "$(echo "$SYSTEM" | cut -d'.' -f1)"
53                 if [ $? -ne 0 ]; then
54                         echo "$NAME: Failed to wake \"$SYSTEM\"!" >&2
55                         return 3
56                 fi
57                 echo "$NAME: Waiting for \"$SYSTEM\" to respond ..."
58                 for i in $(seq 1 $TIMEOUT); do
59                         fping -c1 -q "$SYSTEM" 2>/dev/null
60                         [ $? -ne 0 ] || break
61                         sleep 1s
62                 done
63                 fping -c1 -q "$SYSTEM" 2>/dev/null
64                 if [ $? -ne 0 ]; then
65                         echo "$NAME: Failed to wake \"$SYSTEM\"!" >&2
66                         return 4
67                 fi
68                 sleep "$WAKE_DELAY"
69                 rsys_was_running=0
70         else
71                 echo "$NAME: \"$SYSTEM\" is already alive, ok."
72                 rsys_was_running=1
73         fi
74
75         for i in $(seq 1 $SSH_CHECKS); do
76                 echo "$NAME: Checking SSH connection [publickey] ($i/$SSH_CHECKS) ..."
77                 if ssh -o PreferredAuthentications=publickey -q "$SYSTEM" true >/dev/null 2>&1; then
78                         echo "$NAME: Ok, SSH seems to be available on \"$SYSTEM\"."
79                         break
80                 fi
81                 echo "$NAME: SSH on \"$SYSTEM\" not ready. Waiting ..."
82                 sleep 10s
83         done
84
85         if [ "$rsys_was_running" -eq 0 ]; then
86                 echo "$NAME: \"$SYSTEM\" just started up. Delaying actions for $BOOT_DELAY ..."
87                 sleep "$BOOT_DELAY"
88         fi
89
90         echo "$NAME: Calling \"$CMD\" on \"$SYSTEM\" ..."
91         echo
92
93         # shellcheck disable=2029
94         ssh -o PreferredAuthentications=publickey -q "$SYSTEM" "$CMD" </dev/null ; r=$?
95
96         echo
97         echo "$NAME: Remote command ended with return code $r."
98         sleep 2
99
100         if [ "$rsys_was_running" -eq 0 ]; then
101                 rsys_users=$(ssh -q "$SYSTEM" w | tail -n +3 | wc -l)
102                 if [ "$rsys_users" = "0" ]; then
103                         echo "$NAME: Power off \"$SYSTEM\" again ..."
104                         # shellcheck disable=2029
105                         ssh -o PreferredAuthentications=publickey -q "$SYSTEM" "sync; shutdown -hP $SHUTDOWN_TIME" 2>&1 >/dev/null
106                         if [ $? -ne 0 ]; then
107                                 echo "$NAME: Failed to power off \"$SYSTEM\"!" >&2
108                                 return 5
109                         fi
110                 else
111                         echo "$NAME: Not shutting down \"$SYSTEM\", \c"
112                         [ "$rsys_users" -eq 1 ] \
113                                 && echo "There is 1 user logged in!" \
114                                 || echo "There are $rsys_users users logged in!"
115                 fi
116         else
117                 echo "$NAME: \"$SYSTEM\" was already alive, not shutting down."
118         fi
119         [ $r -eq 0 ] && return 0 || return 1
120 }
121
122 # Defaults
123 NO_WAKE=
124
125 # Check required tools
126 for t in fping ssh etherwake lockfile-create; do
127         if ! command -v "$t" >/dev/null 2>&1; then
128                 echo "$NAME: Required tool \"$t\" missing, aborting!"
129                 exit 3
130         fi
131 done
132
133 while [ $# -gt 0 ]; do
134         case "$1" in
135           --no-wake|-n)
136                 NO_WAKE=1; ;;
137           --help|-*)
138                 do_help; ;;
139           *)
140                 break
141         esac
142         shift
143 done
144
145 [ $# -gt 0 ] || do_help
146
147 result=0
148 while [ $# -gt 0 ]; do
149         cnf="$RCMD_D/$1"
150         if [ ! -r "$cnf" ]; then
151                 echo "$NAME: Can't read \"$cnf\"!"
152                 r=1
153         else
154                 echo "$NAME: Working on \"$1\" - $(date "+%Y-%m-%d %H:%M:%S")"
155                 NAME="$BASENAME($1)"
156                 lck="/run/lock/$NAME-$1.lock"
157                 if lockfile-create --quiet --use-pid --retry 2 --lock-name "$lck"; then
158                         do_rcmd "$1" ; r=$?
159                         echo "$NAME: Done ($r) - $(date "+%Y-%m-%d %H:%M:%S")"
160                         rm -f "$lck"
161                 else
162                         echo "$NAME: Failed to acquire lock file, skipping job! ($lck)"
163                         r=1
164                 fi
165                 NAME=$BASENAME
166         fi
167         if [ $r -ne 0 ]; then
168                 # Exit code indicates an error. But only flag this as error
169                 # when target should have been woken up and the indicated error
170                 # is not "system down" (9).
171                 [ -z "$NO_WAKE" ] || [ $r -ne 9 ] && result=1
172         fi
173         shift
174 done
175
176 echo "$NAME: All done, exit code $result - $(date "+%Y-%m-%d %H:%M:%S")"
177 exit $result