#!/usr/local/bin/bash
# spincheck.sh version 2017-01-18.
# Run as superuser. See notes at end.

# Creates logfile and sends all stdout and stderr to the log, 
# leaving the previous contents in place. If you want to append to existing log, 
# add '-a' to the tee command.
LOG=/mnt/Ark/Jim/spincheck.log
exec > >(tee -i $LOG) 2>&1

SP=33.57	#  Setpoint mean temperature, for information only

##############################################
# function get_disk_name
# Get disk name from current LINE of DEVLIST
##############################################
# The awk statement works by taking $LINE as input,
# setting '(' as a _F_ield separator and taking the second field it separates
# (ie after the separator), passing that to another awk that uses
# ',' as a separator, and taking the first field (ie before the separator).
# In other words, everything between '(' and ',' is kept.

# camcontrol output for disks on HBA seems to change  every version,
# so need 2 options to get ada/da disk name.
function get_disk_name {
		if [[ $LINE == *"(p"* ]] ; then     # for (pass#,[a]da#)
			DEVID=$(echo $LINE | awk -F ',' '{print $2}' | awk -F ')' '{print$1}')
		else                                # for ([a]da#,pass#)
			DEVID=$(echo $LINE | awk -F '(' '{print $2}' | awk -F ',' '{print$1}')
		fi
}

############################################################
# function print_header
# Called when script starts and each quarter day
############################################################
function print_header {
   DATE=$(date +"%A, %b %d")
   printf "\n%s \n" "$DATE"
   echo -n "          "
   while read LINE ; do
      get_disk_name
      printf "%-5s" $DEVID
   done <<< "$DEVLIST"             # while statement works on DEVLIST
   printf "%4s %5s %5s %3s %5s %5s %5s %5s %5s %5s %5s %-7s" "Tmax" "Tmean" "ERRc" "CPU" "FAN1" "FAN2" "FAN3" "FAN4" "FANA" "Fan%0" "Fan%1" "MODE" 
}

#################################################
# function manage_data: Read, process, print data
#################################################
function manage_data {
   Tmean=$(echo "scale=3; $Tsum / $i" | bc)
   ERRc=$(echo "scale=2; $Tmean - $SP" | bc)
   # Read duty cycle, convert to decimal.
   # May need to disable these 3 lines as some boards apparently return
   # incorrect data. In that case just assume $DUTY hasn't changed.
   DUTY0=$($IPMITOOL raw 0x30 0x70 0x66 0 0) # in hex
   DUTY0=$((0x$(echo $DUTY0)))   # strip leading space and decimate
   DUTY1=$($IPMITOOL raw 0x30 0x70 0x66 0 1) # in hex
   DUTY1=$((0x$(echo $DUTY1)))   # strip leading space and decimate
   # Read fan mode, convert to decimal.
   MODE=$($IPMITOOL raw 0x30 0x45 0) # in hex
   MODE=$((0x$(echo $MODE)))   # strip leading space and decimate
   # Text for mode
   case $MODE in
      0) MODEt="Standard" ;;
      4) MODEt="HeavyIO" ;;
      2) MODEt="Optimal" ;;
      1) MODEt="Full" ;;
   esac
   # Get reported fan speed in RPM.
   # Get reported fan speed in RPM from sensor data repository.
   # Takes the pertinent FAN line, then a number with 3 to 5 
   # consecutive digits
   SDR=$($IPMITOOL sdr)
   RPM_FAN1=$(echo "$SDR" | grep "FAN1" | grep -Eo '[0-9]{3,5}')
   RPM_FAN2=$(echo "$SDR" | grep "FAN2" | grep -Eo '[0-9]{3,5}')
   RPM_FAN3=$(echo "$SDR" | grep "FAN3" | grep -Eo '[0-9]{3,5}')
   RPM_FAN4=$(echo "$SDR" | grep "FAN4" | grep -Eo '[0-9]{3,5}')
   RPM_FANA=$(echo "$SDR" | grep "FANA" | grep -Eo '[0-9]{3,5}')
   # Get    # print current Tmax, Tmean
   printf "^%-3d %5.2f" $Tmax $Tmean 
}

##############################################
# function DRIVES_check
# Print time on new log line. 
# Go through each drive, getting and printing 
# status and temp, then call function manage_data.
##############################################
function DRIVES_check {
   echo  # start new line
   TIME=$(date "+%H:%M:%S"); echo -n "$TIME  "
   Tmax=0; Tsum=0  # initialize drive temps for new loop through drives
   i=0  # count number of spinning drives
   while read LINE ; do
      get_disk_name
      TEMP=$(/usr/local/sbin/smartctl -a -n standby "/dev/$DEVID" | grep "Temperature_Celsius" | grep -o "..$")
      /usr/local/sbin/smartctl -n standby "/dev/$DEVID" > /var/tempfile
      RETURN=$?               # need to preserve because $? changes with each 'if'
      if [[ $RETURN == "0" ]] ; then
         STATE="*"  # spinning
      elif [[ $RETURN == "2" ]] ; then
         STATE="_"  # standby
      else
         STATE="?"  # state unknown
      fi
      printf "%s%-2d  " "$STATE" $TEMP
      # Update temperatures each drive; spinners only
      if [ "$STATE" == "*" ] ; then
         let "Tsum += $TEMP"
         if [[ $TEMP > $Tmax ]]; then Tmax=$TEMP; fi;
         let "i += 1"
      fi
   done <<< "$DEVLIST"
   manage_data  # manage data function
}

#####################################################
# All this happens only at the beginning
# Initializing values, list of drives, print header
#####################################################

echo "How many whole minutes do you want between spin checks?"
read T
SEC=$(bc <<< "$T*60")			# bc is a calculator
IPMITOOL=/usr/local/bin/ipmitool
# Get list of drives
DEVLIST1=$(/sbin/camcontrol devlist)
# Remove lines with flash drives or SSD; edit as needed
# You could use another strategy, e.g., find something in the camcontrol devlist 
# output that is unique to the drives you want, for instance only WDC drives:
# if [[ $LINE != *"WDC"* ]] . . .
DEVLIST="$(echo "$DEVLIST1"|sed '/KINGSTON/d;/ADATA/d;/SanDisk/d')"
DEVCOUNT=$(echo "$DEVLIST" | wc -l)
printf "\n%s\n" "IMPORTANT NOTE ABOUT DUTY CYCLE (Fan%0 and Fan%1): Some boards apparently report incorrect duty cycle,
and can report duty cycle for zone 1 when that zone does not exist."

printf "\n                 Key to drive status symbols:  * spinning;  _ standby;  ? unknown"
print_header

###########################################
# Main loop through drives every T minutes
###########################################
while [ 1 ] ; do
	# Print header every quarter day.  Expression removes any
	# leading 0 so it is not seen as octal
	HM=$(date +%k%M); HM=`expr $HM + 0`
	R=$(( HM % 600 ))  # remainder after dividing by 6 hours
	if (( $R < $T )); then print_header; fi
	Tmax=0; Tsum=0  # initialize drive temps for new loop through drives
    DRIVES_check
    CPU_TEMP=$(sysctl -a dev.cpu.0.temperature | awk -F ' ' '{print $2}' | awk -F '.' '{print$1}')

   # Print data.  If a fan doesn't exist, RPM value will be null.  These expressions 
   # substitute a value "---" if null so printing is not messed up.  Duty cycle may be 
   # reported incorrectly by boards and they can report duty for zone 1 even if there 
   # is no such zone.
	printf "%6.2f %3d %5s %5s %5s %5s %5s %5d %5d %-7s" $ERRc $CPU_TEMP "${RPM_FAN1:----}" "${RPM_FAN2:----}" "${RPM_FAN3:----}" "${RPM_FAN4:----}" "${RPM_FANA:----}" $DUTY0 $DUTY1 $MODEt

    sleep $(($T*60)) # seconds between runs
done

# Logs:
#   - disk status (spinning or standby)
#   - disk temperature (Celsius) if spinning
#   - max and mean disk temperature
#   - current 'error' of Tmean from setpoint (for information only)
#   - CPU temperature
#   - RPM for FAN1-4 and FANA
#   - duty cycle for fan zones 0 and 1
#   - fan mode

# IMPORTANT NOTE ABOUT DUTY CYCLE: Some boards apparently report incorrect
# duty cycle, and can report duty cycle for zone 1 when that zone does not
# exist.
# Includes disks on motherboard and on HBA. 
# Uses joeschmuck's smartctl method (returns 0 if spinning, 2 in standby)
# https://forums.freenas.org/index.php?threads/how-to-find-out-if-a-drive-is-spinning-down-properly.2068/#post-28451
# Other method (camcontrol cmd -a) doesn't work with HBA
