Backing up a controller team


NOTE: Because the scripts in this appendix cross page boundaries, be careful to avoid including the page number when copying a script. Copying a script one page at a time can prevent inclusion of page numbers.


#!/bin/bash 
#------------------------------------------------------------------------------- 
# Copyright 2013 Hewlett Packard Co., All Rights Reserved. 
#-------------------------------------------------------------------------------
#
# Backup a Team 
#-------------------------------------------------------------------------------
export BACKUP_DIR="/opt/sdn/backup"
export BACKUP_TEAM_DIR="/opt/sdn/team_backup"
export TEAM_BACKUP_STATUS_FILE="$BACKUP_TEAM_DIR/teamBackup_status" 
export TEAM_BACKUP_LOGFILE="$BACKUP_TEAM_DIR/teamBackup_log.log" 
export BACKUP_WAIT_COUNT=200 # this * 10 = seconds to wait for backup to finish
export B_PID=$$
trap "exit 1" TERM
#==============================================================================
# F U N C T I O N S 
#==============================================================================
#------------------------------------------------------------------------------ 
# Function validateTeamLead ( ) 
# Validates configured node IP against the team leader IP.
#------------------------------------------------------------------------------
function validateTeamLead {
leaderIp=`ifconfig|grep -o $leaderIp`
if [ "$leaderIp" == "" ]; then
teamBackup_log "Run this script from the team lead node."
exitBackup 1
fi
teamBackup_log "Leader node IP $leaderIp is correctly configured." 
}
#------------------------------------------------------------------------------ 
# Function validateTeamBackupStatus ( )
# Checks if a new backup can be started.
#------------------------------------------------------------------------------
function validateTeamBackupStatus {
TEAM_BACKUP_ON="backup_in_progress=true"
# Check if any backup is going on now. 
if [ -e "$TEAM_BACKUP_STATUS_FILE" ]; then 
teamBackup_log "Backup status file $TEAM_BACKUP_STATUS_FILE exists." 
backupStatus=`cat $TEAM_BACKUP_STATUS_FILE`
if [ "$backupStatus" == "$TEAM_BACKUP_ON" ]; then
teamBackup_log "Backup already in progress, aborting new backup..."

exitBackup 1
fi
fi
rm -rf $BACKUP_TEAM_DIR
mkdir $BACKUP_TEAM_DIR
chmod 777 $BACKUP_TEAM_DIR 
echo $TEAM_BACKUP_ON>$TEAM_BACKUP_STATUS_FILE
teamBackup_log "No backup is currently in progress. A new backup can start." 
}
#------------------------------------------------------------------------------ 
# Function backupNode ( <nodeIndex> )
# Backs up a node.
#------------------------------------------------------------------------------
function backupNode { 
local nodeIndex=$1
local backupToken=${nodeAuth[$nodeIndex]}
local backupIp=${ipArr[$nodeIndex]}
local backupUUID=${nodeUUID[$nodeIndex]} 
backupURL="https://$backupIp:8443/sdn/v2.0/backup
post $backupIp $backupToken "$backupURL"
if [ $errorCode -ne 0 ]; then
teamBackup_log "Failed to start backup for $backupIp."
exitBackup 1
fi
if [ "$sessionID" == "" ]; then
teamBackup_log "Failed to start backup on $backupIp."
exitBackup 1 
fi
echo $sessionID
}
#------------------------------------------------------------------------------
# Function downloadBackupSet ( <nodeIndex> )
# Downloads the backup file from each node to the team leader node, verifying the checkum. 
#------------------------------------------------------------------------------
function downloadBackupSet {
local nodeIndex=$1 
local backupAuth=${nodeAuth[$nodeIndex]}
local backupIp=${ipArr[$nodeIndex]}
local backupUUID=${nodeUUID[$nodeIndex]} 
local fileName=""
if [ "$backupIp" == "$leaderIp" ]; then
fileName="$BACKUP_TEAM_DIR/sdn_controller_backup_$backupIp.Leader.zip"
else
fileName="$BACKUP_TEAM_DIR/sdn_controller_backup_$backupIp.zip" 
fi
backupUrl="https://$backupIp:8443/sdn/v2.0/backup"
`get $backupIp $backupAuth $backupUrl > $fileName`
expected=`get $nodeIP "v2.0/backup/checksum"`
actual=$(sha256sum "$fileName" | cut -d ' ' -f1)
if [ "$expected" != "$actual" ]; then
echo "Checksum failure: expected $expected but got $actual."
exitBackup 1
fi
teamBackup_log "Successfully copied backup MD5 file from $backupIp."
}
}
#------------------------------------------------------------------------------
# Function verifyBackupStatus ( <nodeIndex> )
# Verifies the success of the backup.
#------------------------------------------------------------------------------ 
function verifyBackupStatus { 
local nodeIndex=$1 
local backupIP=${ipArr[$nodeIndex]}
local backupUrl="https://$backupIP:8443/sdn/v2.0/backup/status"
backupStatus[$nodeIndex]=`get $backupIP ${nodeAuth[$nodeIndex]} $backupUrl`
if [ "${backupStatus[$nodeIndex]}" == "SUCCESS" ]; then
teamBackup_log "Backup completed successfully on $backupIP."
let "backup_complete = $backup_complete - 1"
return 
fi
}
#------------------------------------------------------------------------------
# Function teamBackupZip ( )
# Creates a single zip for all the team backup data.
#------------------------------------------------------------------------------ 
function teamBackupZip { 
teamZip=`date|tr ' ' '_'|tr ':' '_'` 
teamZip="$BACKUP_TEAM_DIR/sdn_team_backup_$teamZip.zip"
rm -rf $BACKUP_TEAM_DIR/sdn_team_backup* $TEAM_BACKUP_STATUS_FILE
zip -r $teamZip $BACKUP_TEAM_DIR/
rm -rf $BACKUP_TEAM_DIR/sdn_controller_backup* 
}
#------------------------------------------------------------------------------
# Function remoteBackupFileCopy ( )
# Copies the team backup zip to the specified remote location.
#------------------------------------------------------------------------------
function remoteBackupFileCopy {
if [ "$remotePath" == "" ]; then
teamBackup_log "Team backup data was not copied to the remote location."
return
fi
teamBackup_log "Copying team backup to the remote location $remotePath..." 
scp $BACKUP_TEAM_DIR/sdn_team_backup* $remotePath
}
#------------------------------------------------------------------------------
# Function getSysInfo ( <authToken> )
# Gets the SysInformation for the running node.
#------------------------------------------------------------------------------
function getSysInfo { 
local leadAuth=$1 
local sysUrl="https://localhost:8443/sdn/v2.0/systems" 
for i in {1..5}; do 
sysInfo=`get localhost $leadAuth "$sysUrl"`
if [ $errorCode -ne 0 ]; then
teamBackup_log "Failed to retrieve the system information."
exitBackup 1
fi
[ "$sysInfo" != "" ] && break
sleep 5
done
if [ "$sysInfo" == "" ]; then
teamBackup_log "Failed to retrieve the system information."
exitBackup 1
fi
}
#------------------------------------------------------------------------------
# Function extractRole_NodeIP ( <systemInfo>) 
# Extracts IP and role for all the nodes in a team. 
#------------------------------------------------------------------------------
function extractRole_NodeIP {
sysinfo=$1 
ipArr=($(echo $sysinfo|tr -d '"'| tr -d '['|tr -d ']'| sed -e 's/\,/\n/g'| grep -w "ip"| cut -d ':' -f2-)) 
roleArr=($(echo $sysinfo|tr -d '"'| tr -d '['|tr -d ']'| sed -e 's/\,/\n/g'| grep -w "role"| cut -d ':' -f2-))
numNodes=${#ipArr[@]} 
teamBackup_log "Number of nodes in the team is $numNodes."
for (( i=0; i<=$numNodes; i++ )); do
if [ "${roleArr[$i]}" == "leader" ]; then
leaderIp=${ipArr[$i]} 
teamBackup_log "The team leader is $leaderIp."
break
fi
done
}
#------------------------------------------------------------------------------
# Function teamBackup_log ( <message> )
# Writes messages to the log for the team backup operation.
#------------------------------------------------------------------------------
function teamBackup_log { 
msg="$1"
echo "$msg" |tee -a $TEAM_BACKUP_LOGFILE
}
#------------------------------------------------------------------------------
# Function exitBackup ( <exitStatus> )
# Exits the backup.
#------------------------------------------------------------------------------
function exitBackup {
[ $1 -ne 0 ] && teamBackup_log "Stopping backup/restore with errors."
rm -rf $TEAM_BACKUP_STATUS_FILE 
kill -s TERM $B_PID
exit $1 
}
#------------------------------------------------------------------------------
# Function get ( <ipAddr> <authToken> <url> )
# Performs a GET.
#------------------------------------------------------------------------------
function get {
local getIP=$1
local getToken=$2
local getUrl=$3 
local attempts=0 
while [ $attempts -lt 5 ]; do
curl --noproxy $getIP --header "X-Auth-Token:$getToken" \
--fail -ksS -L -f --request GET --url "$getUrl" 
errorCode=$?
let "attempts = $attempts + 1"
if [ 35 -eq $errorCode ]; then
teamBackup_log "SSL error on GET of $getUrl, retrying..."
continue; 
fi
break;
done
}
#------------------------------------------------------------------------------
# Function post ( <ipAddr> <authToken> <url> <data>)
# Performs a POST of the specified data.
#------------------------------------------------------------------------------
function post { 
local postIP=$1
local postToken=$2 
local postUrl=$3
local postData=$4 
local attempts=0 
while [ $attempts -lt 5 ]; do 
postRes=`curl --noproxy $postIP --header "X-Auth-Token:$postToken" \
--fail -ksS --request POST --url "$postUrl" --data-binary "$postData"`
errorCode=$? 
let "attempts = $attempts + 1" 
if [ 35 -eq $errorCode ]; then
teamBackup_log "SSL error on POST to $postUrl, retrying..."
continue;
fi

break;
done
echo $postRes
}
#------------------------------------------------------------------------------
# Function put ( <ipAddr> <authToken> <url> <data> )
# Performs a PUT of the specified data. 
#------------------------------------------------------------------------------
function put { 
local putIP=$1 
local putToken=$2 
local putUrl=$3
local putData=$4
local attempts=0
while [ $attempts -lt 5 ]; do
putRes=`curl --noproxy $putIP --header "X-Auth-Token:$putToken" \ 
--fail -ksS -L -f --request PUT "$putUrl" --data-binary "$putData"`
errorCode=$? 
let "attempts = $attempts + 1"
if [ 35 -eq $errorCode ]; then
teamBackup_log "SSL error on POST to $putUrl, retrying"
continue;
fi
break;
done
echo $putRes
}
#------------------------------------------------------------------------------
# Function extractJSONString ( <json> <fieldName> ) 
# Extracts the Json value corresponding to the field name. 
#------------------------------------------------------------------------------
function extractJSONString {
json=$1 
field=$2
json=`echo $json|tr -d '"'| sed -e 's/\,\|{/\n/g'|grep -w "$field"| \
cut -d ':' -f2-` 
echo $json
}
#------------------------------------------------------------------------------
# Function getAuthToken ( <ipAddr> )
# Log-in and get the UID. 
#------------------------------------------------------------------------------
function getAuthToken { 
local nodeIP=$1
url="https://$nodeIP:8443/sdn/v2.0/auth"
login="{
\"login\": { 
\"domain\": \"$domain\",
\"user\": \"$user\",
\"password\": \"$pass\" 
}
}"
# Attempt to authenticate and extract token if successful. 
auth=$(curl --noproxy $nodeIP -X POST --fail -ksSfL --url "$url" \
-H "Content-Type: application/json" --data-binary "$login" 2>&1)
if [ $? -ne 0 ]; then
teamBackup_log "Unable to authenticate as user $user in $domain domain."
exitBackup 1 
fi
authToken=`extractJSONString "$auth" "token" | sed '/^$/d'`
if [ $restore_mode -ne 1 ] && [ "$authToken" == "" ]; then 
teamBackup_log "Failed to get the authentication token."
exitBackup 1
fi
echo $authToken
}
#==============================================================================
# M A I N
#==============================================================================
restore_mode=0
# Check for zip package.
command -v zip &> /dev/null 
if [ $? -ne 0 ]; then
echo "The zip package must be installed to use this script."
exit 1
fi
# Check the user specified script parameters.
if [ $# -lt 2 ]; then 
echo "Usage : backupTeam <user> <domain> [<user@ip:path>]"
echo " <user> - user name to access the controller" 
echo " <domain> - domain of the controller" 
echo " [<user@ip:path>] - remote location to store backup file" 
echo " user - the login name for the system"
echo " ip - the ip address of the system" 
echo " path - where to copy the file to on the remote system" 
exit 1
fi
validateTeamBackupStatus
user="$1"
echo -n "Enter Controller Password: " 
read -s pass 
echo
domain="$2"
remotePath=$3
errorCode=0
# Get the authentication token for the local controller. 
leaderAuth=`getAuthToken localhost`
# Get the system Information for the local controller. 
getSysInfo $leaderAuth 
# Get the set of team IPs and their associated team roles. 
extractRole_NodeIP $sysInfo 
(validateTeamLead)
# Initiate a backup on each node. 
for (( i=0; i<$numNodes; i++ )); do 
nodeAuth[$i]=`getAuthToken ${ipArr[$i]}`
uuidURL="https://${ipArr[$i]}:8443/sdn/v2.0/systems"
nodeUUID[$i]=`get ${ipArr[$i]} ${nodeAuth[$i]} "$uuidURL?ip=${ipArr[$i]}"` 
nodeUUID[$i]=`extractJSONString "${nodeUUID[$i]}" "uid" | sed '/^$/d'` 
if [ "${ipArr[$i]}" == "$leaderIp" ]; then
# Skip the leader backup backup, since it will be done last. 
leaderIndex=$i 
continue
fi
backupNode $i
teamBackup_log "Started backup on ${ipArr[$i]}."
done
# Verify the status of the backup on each node. 
backup_complete=$numNodes 
waitTime=$(($BACKUP_WAIT_COUNT*10/60))
for (( k=0; k<$BACKUP_WAIT_COUNT; k++ )); do 
if [ $backup_complete -le 1 ]; then
teamBackup_log "Backup on all member nodes completed successfully." 
break
fi
sleep 10 
for (( i=0; i<$numNodes; i++ )); do 
# Skip the leader node check, since it will be done last. 
[ "${ipArr[$i]}" == "$leaderIp" ] && continue
# Backup already completed for this node, so continue. 
[ "${backupStatus[$i]}" == "SUCCESS" ] && continue
verifyBackupStatus $i 
done
done
if [ $backup_complete -gt 1 ]; then 
teamBackup_log "Backup of all member nodes took longer than $waitTime min. Aborting backup..." 
teamBackup_log "To increase backup wait time, change BACKUP_WAIT_COUNT in the script." 
exitBackup 1
fi
# Last, backup the leader node to avoid synchronization issues on a restore. 
backupNode $leaderIndex
teamBackup_log "Started backup on leader ${ipArr[$leaderIndex]}."
backup_complete=1
# Verify the backup on the leader node.
for (( k=0; k<$BACKUP_WAIT_COUNT; k++ )); do
sleep 10
verifyBackupStatus $leaderIndex
if [ $backup_complete -le 0 ]; then 
teamBackup_log "Backup on the leader node completed successfully."
break
fi
done
if [ $backup_complete -gt 0 ]; then
teamBackup_log "Backup of the leader node took longer than $waitTime min. Aborting backup..." 
teamBackup_log "To increase backup wait time, change BACKUP_WAIT_COUNT in the script."
exitBackup 1 
fi
# Copy all the backup files from each node in the team onto the leader node.
for (( i=0; i<$numNodes; i++ )); do
downloadBackupSet $i 
done
# Create one zip for entire team and copy it to the specified remote location. 
teamBackupZip
remoteBackupFileCopy
echo
teamBackup_log "The team was backed up successfully."
exitBackup 0