#!/bin/sh
#

# monitor_hdd_temp.sh
#
# This is a simple script that monitor the disks temperature on a freebsd server.
#
# It creates two file for each found drive with smart capabilities:
#       <drive>.old -> keeps the temperature for the previous run
#       <drive>.new -> keeps the temperature for the current run
#
# The first time the scrip is run, both files are created with the current temperature.
#
# crontab tasks entries will control when the script is to be executed. 
#
# If the drive temperature exceeds the temperature defined @ max_temp variable or 
# the temperature change exceeds the threshold defined @ max_dif variable, an alert 
# e-mail will be sent to the e-mail address defined @ root_email variable.
#
# The script uses the following variables:
#
# smartctl      -> full path to 'smartctl' program
# data_dir      -> directory to keep created drives' temperature history
# max_temp      -> max drive temperature for pre-fail alert (red alert!)
# alert_temp    -> alert temperature (getting hot -- yellow alert!)
# max_dif       -> threshold temperature changes
# root_email    -> to e-mail for alerts
# from_email    -> from e-mail for alerts (needed in some cases to prevent alerts going to spam)
# email_subject -> subject for alert e-mail
# server_name   -> name of this server
# email_body    -> actual e-mail text
# log_size      -> Max number of lines before log is rotated
#
# 
# get_smart_drives function and drives temperature routine developed by Keith Nash
#
# Below license:
#
# ----------------------------------------------------------------------------
# MIT License
#
# Copyright (c) 2017 by Keith Nash
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ------------------------------------------------------------------------------
#
# Above license also applies to this script
#
# Copyright (c) 2017 by Aloisio Mello
#
# Feedback welcome: al_mello@hotmail.com
#

#
# Full path to 'smartctl' program:
#
smartctl=/usr/local/sbin/smartctl

#
# Server name for e-mail notification
#
server_name=$(hostname -s | tr '[:lower:]' '[:upper:]')

#
# Path for drives historical temperatures
#
data_dir=/raid/scripts/hddtemp

#
# Max temperature for Red Alert (in Celsius)
#
max_temp=42

#
# Alert temperature for Yellow Alert  (in Celsius)
#
alert_temp=37

#
# Threshold for temperature changes between runs
#
max_dif=2

#
# e-mail for alerts' notification
#
root_email="al_mello@hotmail.com"

#
# from e-mail to prevent alerts going to spam
#
from_email="mello@domain.com"

#
# e-mail subject
#
email_subject="Subject haven't be updated. Report please!"

#
# email body
#
email_body="Body haven't be updated. Report please!"

#
# Log file
#
log_file=$data_dir"/hddtemp.log"

#
# Log size (# Lines)
#
log_size=2000

###############################
# Log
###############################

log()
{
timestamp=$(date +"[%Y-%m-%d %H:%M:%S]")
parm="$1"
  if [ -e $log_file ];
     then
     echo "$timestamp monitor_hdd_temp: $parm" >> $log_file
  else
    echo "$timestamp monitor_hdd_temp: Log file created" > $log_file
    echo "$timestamp monitor_hdd_temp: $parm" >> $log_file
  fi
}


################################
# Determine smart enabled drives
################################

get_smart_drives()
{
  gs_smartdrives=""
  gs_drives=$("$smartctl" --scan | awk '{print $1}')

  for gs_drive in $gs_drives; do
    gs_smart_flag=$("$smartctl" -i "$gs_drive" | grep "SMART support is: Enabled" | awk '{print $4}')
    if [ "$gs_smart_flag" = "Enabled" ]; then
      gs_smartdrives="$gs_smartdrives $gs_drive"
    fi
  done
  echo "$gs_smartdrives"

}

##########################
# Send e-mail
##########################

send_email()
{

cover=$data_dir"/$(basename "$drive").alert"
touch $cover
# Write email header to temp file
(
  echo "To: ${root_email}"
  echo "Subject: ${email_subject}"
  echo "From: ${from_email}"
  echo " "
  echo ${email_body} 
) > $cover

sendmail -t < $cover
}

############################
# Main
############################

############################
# Rotate log
############################

mkdir -p $data_dir
log "Start"
logsize=$(wc -l < $log_file)
if [ "$logsize" -gt  "$log_size" ];
   then
     echo > $log_file
     log "Log file rotated"
fi
log "Getting list of drives"
drives=$(get_smart_drives)
errors=0

#############################
# Drive temperatures:
#############################

for drive in $drives; do
  serial=$("$smartctl" -i "$drive" | grep "Serial Number" | awk '{print $3}')
  capacity=$("$smartctl" -i "$drive" | grep "User Capacity" | awk '{print $5 $6}')
  temp=$("$smartctl" -A "$drive" | grep "194 Temperature" | awk '{print $10}')
  if [ -z "$temp" ]; then
    temp=$("$smartctl" -A "$drive" | grep "190 Airflow_Temperature" | awk '{print $10}')
  fi
  dfamily=$("$smartctl" -i "$drive" | grep "Model Family" | awk '{print $3, $4, $5, $6, $7}' | sed -e 's/[[:space:]]*$//')
  dmodel=$("$smartctl" -i "$drive" | grep "Device Model" | awk '{print $3, $4, $5, $6, $7}' | sed -e 's/[[:space:]]*$//')
  if [ -z "$dfamily" ]; then
    dinfo="$dmodel"
  else
    dinfo="$dfamily ($dmodel)"
  fi
  #printf '%6.6s: %5s %-8s %-20.20s %s\n' "$(basename "$drive")" "$temp" "$capacity" "$serial" "$dinfo" 
  old_temp=$data_dir"/"$(basename "$drive")".old"
  new_temp=$data_dir"/"$(basename "$drive")".new"
  log "Processing: $drive: $dmodel - $serial - temp $temp"C""

  #
  # Creates/updates drives' temperature files
  #

  email_subject=""
  email_body=""

  if [ -e $new_temp ];
    then # Found previous file with current temp -> copy to old and update current.
      old_hist=$(cat $old_temp)
      rm $old_temp
      cp $new_temp $old_temp
    else # Drive current temp file not found
      echo $temp > $old_temp # Update old temp file with current 
      old_hist=$(cat $old_temp)      
  fi

  echo $temp > $new_temp # Update current temp
  temp_dif=$((temp-old_hist))

  ############################
  # Alerts
  ############################

  #
  # Alert temperature
  #

  if [ $temp -gt $alert_temp ];
     then
       email_subject="Drive on server $server_name reached alert temperature! "
       email_body="Drive $drive current temperature ($temp""C), exceeded alert temperature ($alert_temp""C)."
       send_email
       log "Drive $drive current temperature ($temp""C), exceeded alert temperature ($alert_temp""C)."
       errors=$((errors+1))
  fi

  #
  # Critic temperature
  #

  if [ $temp -gt $max_temp ];
     then
       email_subject="Drive on server $server_name reached critical temperature!"
       email_body="Drive $drive current temperature ($temp""C), exceeded max configured ($max_temp""C)."
       send_email
       log "Drive $drive current temperature ($temp""C), exceeded max configured ($max_temp""C)."
       errors=$((errors+1))
  fi

  #
  # Temperature threshold alert
  #

  if [ $temp_dif -gt $max_dif ];
     then
       email_subject="Server $server_name $drive temperature changed alert! "
       email_body="$drive temperature changed above threshold. Current: $temp C, previous $old_hist C"
       send_email
       log "$drive temperature changed above threshold. Current: $temp C, previous $old_hist C"
       errors=$((errors+1))
  fi


done

if [ "$errors" -eq  "0" ];
   then
     log "Finished. No temperature issues."
   else
     log "Total alerts: $errors"
     log "Finished"
fi