-
Notifications
You must be signed in to change notification settings - Fork 0
/
sshd.sh
executable file
·187 lines (148 loc) · 4.99 KB
/
sshd.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/bin/sh
##**************************************************************
##
## Copyright (C) 1990-2017, Condor Team, Computer Sciences Department,
## University of Wisconsin-Madison, WI.
##
## Licensed under the Apache License, Version 2.0 (the "License"); you
## may not use this file except in compliance with the License. You may
## obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
##**************************************************************
sshd_cleanup() {
rm -f ${hostkey}.dsa ${hostkey}.rsa ${hostkey}.dsa.pub ${hostkey}.rsa.pub ${idkey} ${idkey}.pub $_CONDOR_SCRATCH_DIR/tmp/sshd.out $_CONDOR_SCRATCH_DIR/contact
}
trap sshd_cleanup SIGTERM
# note the sshd requires full path
SSHD=`condor_config_val CONDOR_SSHD`
KEYGEN=`condor_config_val CONDOR_SSH_KEYGEN`
CONDOR_CHIRP=`condor_config_val libexec`
CONDOR_CHIRP=$CONDOR_CHIRP/condor_chirp
if [ -z "$SSHD" -o -z "$KEYGEN" ]
then
echo CONDOR_SSHD and/or CONDOR_SSH_KEYGEN are not configured, exiting
exit 255
fi
PORT=4444
_CONDOR_REMOTE_SPOOL_DIR=$_CONDOR_REMOTE_SPOOL_DIR
_CONDOR_PROCNO=$1
_CONDOR_NPROCS=$2
# make a tmp dir to store keys, etc, that
# wont get transfered back
if [ ! -d $_CONDOR_SCRATCH_DIR/tmp ] ; then
mkdir $_CONDOR_SCRATCH_DIR/tmp
fi
# Create the host keys
hostkey=$_CONDOR_SCRATCH_DIR/tmp/hostkey
for keytype in dsa rsa
do
rm -f ${hostkey}.${keytype} ${hostkey}.${keytype}.pub
$KEYGEN -q -f ${hostkey}.${keytype} -t $keytype -N ''
_TEST=$?
if [ $_TEST -ne 0 ]
then
echo ssh keygenerator $KEYGEN returned error $_TEST exiting
exit 255
fi
done
idkey=$_CONDOR_SCRATCH_DIR/tmp/$_CONDOR_PROCNO.key
# Create the identity key
$KEYGEN -q -f $idkey -t rsa -N ''
_TEST=$?
if [ $_TEST -ne 0 ]
then
echo ssh keygenerator $KEYGEN returned error $_TEST exiting
exit 255
fi
# Send the identity keys back home
$CONDOR_CHIRP put -perm 0700 $idkey $_CONDOR_REMOTE_SPOOL_DIR/$_CONDOR_PROCNO.key
_TEST=$?
if [ $_TEST -ne 0 ]
then
echo error $_TEST chirp putting identity keys back
exit 255
fi
# ssh needs full paths to all of its arguments
# Start up sshd
done=0
while [ $done -eq 0 ]
do
# Try to launch sshd on this port
$SSHD -p$PORT -oAuthorizedKeysFile=${idkey}.pub -oHostKey=${hostkey}.dsa -oHostKey=${hostkey}.rsa -De -f/dev/null -oStrictModes=no -oPidFile=/dev/null -oAcceptEnv=_CONDOR < /dev/null > $_CONDOR_SCRATCH_DIR/tmp/sshd.out 2>&1 &
pid=$!
# Give sshd some time
sleep 2
if grep "Server listening" $_CONDOR_SCRATCH_DIR/tmp/sshd.out > /dev/null 2>&1
then
done=1
else
# it is probably dead now
#kill -9 $pid > /dev/null 2>&1
PORT=`expr $PORT + 1`
fi
done
# Don't need this anymore
rm $_CONDOR_SCRATCH_DIR/tmp/sshd.out
# create contact file
hostname=`hostname -i`
currentDir=`pwd`
user=`whoami`
thisrun=`$CONDOR_CHIRP get_job_attr EnteredCurrentStatus`
echo "$_CONDOR_PROCNO $hostname $PORT $user $currentDir $thisrun" |
$CONDOR_CHIRP put -mode cwa - $_CONDOR_REMOTE_SPOOL_DIR/contact
_TEST=$?
if [ $_TEST -ne 0 ]
then
echo error $_TEST chirp putting contact info back to submit machine
exit 255
fi
# On the head node, grep for the contact file and the keys
if [ $_CONDOR_PROCNO -eq 0 ]
then
done=0
count=0
# Need to poll the contact file until all nodes have reported in
while [ $done -eq 0 ]
do
rm -f contact
$CONDOR_CHIRP fetch $_CONDOR_REMOTE_SPOOL_DIR/contact $_CONDOR_SCRATCH_DIR/contact
lines=`grep -c $thisrun $_CONDOR_SCRATCH_DIR/contact`
if [ $lines -eq $_CONDOR_NPROCS ]
then
done=1
node=0
while [ $node -ne $_CONDOR_NPROCS ]
do
$CONDOR_CHIRP fetch $_CONDOR_REMOTE_SPOOL_DIR/$node.key $_CONDOR_SCRATCH_DIR/tmp/$node.key
# Now that we've got it, the submit side doesn't need it anymore
$CONDOR_CHIRP remove $_CONDOR_REMOTE_SPOOL_DIR/$node.key
node=`expr $node + 1`
done
chmod 0700 $_CONDOR_SCRATCH_DIR/tmp/*.key
# Erase the contact file from the spool directory, in case
# this job is held and rescheduled
$CONDOR_CHIRP remove $_CONDOR_REMOTE_SPOOL_DIR/contact
else
# Wait a second before polling again
sleep 1
fi
# Timeout after polling 1200 times (about 20 minutes)
count=`expr $count + 1`
if [ $count -eq 1200 ]
then
exit 1
fi
done
fi
# We'll source in this file in the MPI startup scripts,
# so we can wait and sshd_cleanup over there as needed
#wait
#sshd_cleanup