Python Script to Monitor Drive Temps

Fox

Explorer
Joined
Mar 22, 2014
Messages
66
I have a python script that parses the SmartCtl output and essentially tokenizes its output (select fields) and prints all your drives and their drive temps automatically. I currently use it on FreeNAS and on my Windows box to have it automatically list all my drives and temps on demand. The intent was to store the data in a SQLite database and then graph them so i can monitor drive temps over time, but I never got that far. In fact, it could be used to also track other Smartctl data points as well.

In any case, if anyone wants it, I'm posting it below. It's python and really "simple" code to read. I should note that it was not tested on any other drives than my own, but should work out of the box. It works either from Windows (run it from a mapped or physical drive) and on FreeNAS command line.

Just didn't want the work to go to waste in case someone can use it. Note that it is based on a specific version of SmartCtl, so if SmartCtl is updated (majorly) and the output is changed, this script may not work. Currently, it is working on SmartCtl 6.2 (on my Windows Box).. Given the way SmartCtl was coded, I expect only a major redesign to change the output format.

Code:
import os,string

def Run_SmartCtl(strArgs):
    cmdstring = "smartctl " + strArgs
    the_output = os.popen(cmdstring).read()
    lines =str.splitlines(the_output)
    #lines = ["/dev/sda"]
    return lines
def Get_Device_Ids():
    lines = Run_SmartCtl("--scan")
    Device_List = []
    for line in lines:
        device_id = string.split(line," ",1)[0]
        Device_List.append(device_id)
    return Device_List
def Get_Device_Info(device_id):
    dev = DeviceRecord()
    dev_info_lines = Run_SmartCtl("-i " + device_id )
    bEnteredInfoSection = False
    dev.device_id = device_id
    for line2 in dev_info_lines:
        if ( not bEnteredInfoSection ):
            TheFirstField = string.split(line2," ",2)
            #if (TheFirstField[0].lower() == 'smartctl' ):
            #    print "SmartCtl Version is: " + TheFirstField[1]
            if ( line2.lower()  == "=== start of information section ===" ) :
                bEnteredInfoSection = True
        else:
            field = string.split(line2,":",1)
            if  (field[0].lower() == "model family" ):
                dev.family = field[1].strip()
            elif  (field[0].lower() == "device model" ):
                dev.model = field[1].strip()
            elif  (field[0].lower() == "serial number" ):
                dev.serial = field[1].strip()
            elif  (field[0].lower() == "firmware version" ):
                dev.firmware_version = field[1].strip()
            elif  (field[0].lower() == "user capacity" ):
                dev.capacity = field[1].strip()
            elif  (field[0].lower() == "sector sizes" ):
                dev.sector_sizes = field[1].strip()
            elif  (field[0].lower() == "rotation rate" ):
                dev.rotation_rate = field[1].strip()
            elif  (field[0].lower() == "device is" ):
                dev.device_is = field[1].strip()
            elif  (field[0].lower() == "ata version is" ):
                dev.ata_version = field[1].strip()
            elif  (field[0].lower() == "sata version is" ):
                dev.sata_version = field[1].strip()
            elif  (field[0].lower() == "smart support is" ):
                temp = string.split(field[1].strip()," ",1)
                #temp = string.split(field[1]," ",1)
                strTemp = temp[0].strip().lower()
                if (strTemp == "available" ):
                    dev.smart_support_available = True
                elif (strTemp == "unavailable" ):
                    dev.smart_support_available = False
                    dev.smart_support_enabled = False
                elif (strTemp == "enabled" ) :
                    dev.smart_support_enabled = True
                elif (strTemp == "disabled" ) :
                    dev.smart_support_enabled = False
    return dev
def Get_Temp_Data(device_id):
    dev_info_lines = Run_SmartCtl("-l scttemp " + device_id )
    for line2 in dev_info_lines:
        TheFirstField = string.split(line2," ",2)
        field = string.split(line2,":",1)
        if  (field[0].lower() == "current temperature" ):
            current_temp = field[1].strip()
        #elif  (field[0].lower() == "device model" ):
        #    dev.model = field[1].strip()
        #elif  (field[0].lower() == "serial number" ):
        #    dev.serial = field[1].strip()
    return current_temp
class DeviceRecord:
    device_id = ""
    family = ""
    model =  ""
    serial = ""
    firmware_version = ""
    capacity = ""
    sector_sizes = ""
    rotation_rate = ""
    device_is  = ""
    ata_version = ""
    sata_version = ""
    smart_support_available = False
    smart_support_enabled = False
def RemoveDupes(devices):
    newlist = []
    for device in devices:
        currentSerial = device.serial
        present = False
        for item in newlist:
            if (item.serial == currentSerial):
                present = True
        if ( not present ) :
            newlist.append(device)
    return newlist   
def RemoveDisabledUnsupported(devices):
    newlist = []
    for device in devices:
        if ( device.smart_support_available == True and device.smart_support_enabled == True ):
             newlist.append(device)
    return newlist   
###################################################################################################

device_list = Get_Device_Ids()
if ( [] == device_list ) : print "No devices found."
list_of_records = []
for strDevice in device_list:
    device = Get_Device_Info(strDevice)
    #print device.family, device.model, device.serial, device.smart_support_available,device.smart_support_enabled
    list_of_records.append(device)
newlist = RemoveDupes(list_of_records)
devices_new = RemoveDisabledUnsupported(newlist)
for x in devices_new:
    print "{0:40}{1:30}{2:15}".format(x.family, x.model,Get_Temp_Data(x.device_id))
raw_input("Press Enter to continue...")
 

DrKK

FreeNAS Generalissimo
Joined
Oct 15, 2013
Messages
3,630
Darn it, I was hoping this was a rickroll that wiped the unsuspecting user's hard drives.
 

Knowltey

Patron
Joined
Jul 21, 2013
Messages
430
Darn it, I was hoping this was a rickroll that wiped the unsuspecting user's hard drives.
I'm sure one of the citizens of your glorious nation could arrange that for you if you "ask" them.
 

Fox

Explorer
Joined
Mar 22, 2014
Messages
66
No.. Sorry to disappoint, it only plays Rick Astley's "Never Gonna Give You Up". :smile: (And yes, I'm kidding)....
 

cyberjock

Inactive Account
Joined
Mar 25, 2012
Messages
19,526

gusgus

Dabbler
Joined
Sep 15, 2014
Messages
13
Thanks for sharing your script, Fox. I expanded the functions to retrieve all current values from smartctl -a output. Overall the script only emails SMART drive temps and error status (I know this is redundant but I prefer to receive an email even when things are ok), but the function supports most of the standard SMART data so one could adapt it to do more, such as storing in a database as Fox suggested. I do realize that it's not as concise and simple as it could be but it works so I'm moving on. I basically just wanted something to monitor drive temps over time and also warn me of any dangerous temperature conditions.

I tested the script quite a bit but if anyone finds a bug please let me know. I plan on building on this code some day so please excuse all my commented out debug probes. In case you were wondering: the odd method of setting writestring = blah followed by file.write(writestring) will hopefully make it easier to update later for Python 3, which supports printing to files (avoiding annoying carriage return and other formatting hassles), which will make it much easier to flip back and forth to printing in the console window and printing to sendmail.

Note: Written and tested in FreeNAS 9.2.1.7 x64.

EDIT: You may notice that I put some crude warning messages in the email alerts, to warn in the event of a hard drive having SMART errors. As it is currently written (as of 11/21/14), it will not throw an error if the drive simply dies such that it does not respond to SMART requests. FreeNAS will through its own security/file system change warning emails, kind of, but this isn't a complete feature of the script and eventually I'll fix it. When I do, i'll be moving it to Github and post it in this thread.

Code:
import os,string,time,sys

sendmail = "/usr/sbin/sendmail" # sendmail location
customscriptlog_location = "/var/log/customscriptlog"

def Run_SmartCtl(strArgs):
    cmdstring = "smartctl " + strArgs
    the_output = os.popen(cmdstring).read()
    lines = str.splitlines(the_output)
    #debug probe
    #print "number of lines in smartctl command output = ", len(lines)
    #for line in lines:
        #print line
    return lines
 
def Get_Device_Ids():
    lines = Run_SmartCtl("--scan")
    Device_List = []
    for line in lines:
        device_id = string.split(line," ",1)[0]
        Device_List.append(device_id)
    #Device_List = ["/dev/da0"]
    return Device_List
 
def Get_Device_Record(device_id):
    dev = DeviceRecord()
    dev_info_lines = Run_SmartCtl("-i " + device_id)
    bEnteredInfoSection = False
    dev.device_id = device_id
    for line in dev_info_lines:
        if ( not bEnteredInfoSection ):
            TheFirstField = string.split(line," ",2)
            if (TheFirstField[0].lower() == 'smartctl' ):
                dev.smartctl_version = TheFirstField[1]
            if ( "information section" in line.lower() ):
                bEnteredInfoSection = True
        else:
            field = string.split(line,":",1)
            if (field[0].lower() == "model family" ):
                dev.family = field[1].strip()
            elif (field[0].lower() == "device model" ):
                dev.model = field[1].strip()
            elif (field[0].lower() == "serial number" ):
                dev.serial = field[1].strip()
            elif (field[0].lower() == "firmware version" ):
                dev.firmware_version = field[1].strip()
            elif (field[0].lower() == "user capacity" ):
                dev.capacity = field[1].strip()
            elif (field[0].lower() == "sector sizes" ):
                dev.sector_sizes = field[1].strip()
            elif (field[0].lower() == "rotation rate" ):
                dev.rotation_rate = field[1].strip()
            elif (field[0].lower() == "device is" ):
                dev.device_is = field[1].strip()
            elif (field[0].lower() == "ata version is" ):
                dev.ata_version = field[1].strip()
            elif (field[0].lower() == "sata version is" ):
                dev.sata_version = field[1].strip()
            elif (field[0].lower() == "smart support is" ):
                temp = string.split(field[1].strip()," ",1)
                strTemp = temp[0].strip().lower()
                if (strTemp == "available" ):
                    dev.smart_support_available = True
                elif (strTemp == "unavailable" ):
                    dev.smart_support_available = False
                elif (strTemp == "enabled" ):
                    dev.smart_support_enabled = True
                elif (strTemp == "disabled" ):
                    dev.smart_support_enabled = False
        #debug probe
        #print "dev.family = " + dev.family
        #print "dev.model = " + dev.model
        #print "dev.serial = " + dev.serial
        #print "dev.firmware_version = " + dev.firmware_version
        #print "dev.capacity = " + dev.capacity
        #print "dev.sector_sizes = " + dev.sector_sizes
        #print "dev.rotation_rate = " + dev.rotation_rate
        #print "dev.device_is = " + dev.device_is
        #print "dev.ata_version = " + dev.ata_version
        #print "dev.sata_version = " + dev.sata_version
        #print "dev.smart_support_available = ", dev.smart_support_available
        #print "dev.smart_support_enabled = ", dev.smart_support_enabled
    return dev
 
def Get_Temp_Data(dev):
    #only run if drive temp has not already been retrieved
    if (dev.current_temp_celsius == -273 ):
        dev_info_lines = Run_SmartCtl("-l scttemp " + dev.device_id )
        for line in dev_info_lines:
            TheFirstField = string.split(line," ",2)
            field = string.split(line,":",1)
            if (field[0].lower() == "current temperature" ):
                #stripping out the temperature units
                subfield = field[1].strip()
                temp = subfield.split()
                dev.current_temp_celsius = temp[0]
    if (dev.current_temp_celsius == -273): #device does not support drive temp
        dev.current_temp_celsius = "None"
        dev.max_temp_celsius = "None"
    if (dev.max_temp_celsius == 0): #device does not list max temp threshold
        dev.max_temp_celsius = "None"
    return dev
 
def Sort_Smart_Test_Logs(dev,full_output):
    #filtering smart test logs to relevant lines and sorting the values in those lines into properties of the dev object
    bSortingAttributeSection = False
    bSortingErrorLogSection = False
    bSortingSelftestSection = False
    for line in full_output:
        if ( not bSortingAttributeSection ):
            if ( "smart attributes data structure" in line.lower() ):
                bSortingAttributeSection = True
        #sort SMART attributes section
        if ( bSortingAttributeSection == True ):
            field = line.split()
            if (field[1].lower() == "reallocated_sector_ct" ):
                dev.smartattrib_reallocated_sector_ct = field[9].strip()
            elif (field[1].lower() == "power_on_hours" ):
                dev.smartattrib_power_on_hours = field[9].strip()
            elif (field[1].lower() == "power_cycle_count" ):
                dev.smartattrib_power_cycle_count = field[9].strip()
            elif (field[1].lower() == "wear_leveling_count" ):
                dev.smartattrib_wear_leveling_count = field[9].strip()
            elif (field[1].lower() == "used_rsvd_blk_cnt_tot" ):
                dev.smartattrib_used_rsvd_blk_cnt_tot = field[9].strip()
            elif (field[1].lower() == "program_fail_cnt_total" ):
                dev.smartattrib_program_fail_cnt_total = field[9].strip()
            elif (field[1].lower() == "erase_fail_count_total" ):
                dev.smartattrib_erase_fail_count_total = field[9].strip()
            elif (field[1].lower() == "runtime_bad_block" ):
                dev.smartattrib_runtime_bad_block = field[9].strip()
            elif (field[1].lower() == "reported_uncorrect" ):
                dev.smartattrib_reported_uncorrect = field[9].strip()
            elif (field[1].lower() == "airflow_temperature_cel" ):
                dev.smartattrib_airflow_temperature_cel_threshold = field[5].strip()
                dev.smartattrib_airflow_temperature_cel_value = field[9].strip()
                #store device temp if it hasn't already been done
                if (dev.current_temp_celsius == -273):
                    dev.max_temp_celsius = field[5].strip()
                    dev.max_temp_celsius = int(str(dev.max_temp_celsius)) #removes leading zeros
                    dev.current_temp_celsius = field[9].strip()
            elif (field[1].lower() == "hardware_ecc_recovered" ):
                dev.smartattrib_hardware_ecc_recovered = field[9].strip()
            elif (field[1].lower() == "udma_crc_error_count" ):
                dev.smartattrib_udma_crc_error_count = field[9].strip()
            elif (field[1].lower() == "unknown_attribute" ):
                dev.smartattrib_unknown_attribute = field[9].strip()
            elif (field[1].lower() == "total_lbas_written" ):
                dev.smartattrib_total_lbas_written = field[9].strip()
                bSortingAttributeSection = False #last line of attribute section
        if ( not bSortingErrorLogSection ):
            if ( "smart error log version" in line.lower() ):
                bSortingErrorLogSection = True
        #sort SMART error log section
        if (bSortingErrorLogSection == True ):
            if (line.strip() == "" ):
                bSortingErrorLogSection = False #last line of error log section
            elif ("no errors logged" in line.lower() ):
                dev.smart_error_logged = "No"
            else:
                dev.smart_error_logged = "YES!"
        if ( not bSortingSelftestSection ):
            if ("smart self-test log structure" in line.lower() ):
                bSortingSelftestSection = True
        #sort SMART self-test log section
        if (bSortingSelftestSection == True):
            if (len(line) == 0):
                whocares = 1 #do nothing. this is the space between the sections
            else:
                field = line.split()
                if (field[0] == "#"):
                    dev.smart_selftests_in_log = field[1].strip()
                    if ( "without error" in line.lower() ):
                        dev.smart_selftest_failed = "None"
                    else: 
                        dev.smart_selftest_failed = "Test ", field[3], " failed"
    return dev
 
class DeviceRecord:
    #version of SmartCtl used to retrieve device records
    smartctl_version = 0.0
 
    #SMART device properties
    device_id = ""
    family = ""
    model = ""
    serial = ""
    firmware_version = ""
    capacity = ""
    sector_sizes = ""
    rotation_rate = ""
    device_is = ""
    ata_version = ""
    sata_version = ""
    smart_support_available = False
    smart_support_enabled = False
    current_temp_celsius = -273
    max_temp_celsius = -273
 
    #SMART attribute values
    smartattrib_reallocated_sector_ct = ""
    smartattrib_power_on_hours = ""
    smartattrib_power_cycle_count = ""
    smartattrib_wear_leveling_count = ""
    smartattrib_used_rsvd_blk_cnt_tot = ""
    smartattrib_program_fail_cnt_total = ""
    smartattrib_erase_fail_count_total = ""
    smartattrib_runtime_bad_block = ""
    smartattrib_reported_uncorrect = ""
    smartattrib_airflow_temperature_cel_value = ""
    smartattrib_airflow_temperature_cel_threshold = ""
    smartattrib_hardware_ecc_recovered = ""
    smartattrib_udma_crc_error_count = ""
    smartattrib_unknown_attribute = ""
    smartattrib_total_lbas_written = ""
 
    #SMART error logs
    smart_error_logged = ""
 
    #SMART self-test log
    smart_selftests_in_log = ""
    smart_selftest_failed = ""

def RemoveDupes(device_records):
    dedupedlist = []
    for device in device_records:
        currentSerial = device.serial
        present = False
        for item in dedupedlist:
            if (item.serial == currentSerial):
                present = True
        if ( not present ):
            dedupedlist.append(device)
    return dedupedlist
 
def RemoveDisabledUnsupported(devices):
    compatibledevices = []
    for device in devices:
        if ( device.smart_support_available == True and device.smart_support_enabled == True ):
            compatibledevices.append(device)
    return compatibledevices

#########################################################################

#enumerate devices, get their device_ids (e.g. /dev/da0)
alldeviceids = Get_Device_Ids()
#debug probe
#alldeviceids = ["/dev/ada0"]
if ( [] == alldeviceids ): print "No devices found."

#obtain device properties
list_of_device_records = []
for device_id in alldeviceids:
    device = Get_Device_Record(device_id)
    #debug probe
    #print "------------------------------"
    #print "Looking up with Get_Device_Record: " + device_id
    #print "Some of the records found: "
    #print device.device_id, device.serial, device.smart_support_available, device.smart_support_enabled
    list_of_device_records.append(device)

#remove any duplicate device record lines
dedupedlist = RemoveDupes(list_of_device_records)

#remove devices that indicated that SMART support is unavailable or disabled
compatibledevices = RemoveDisabledUnsupported(dedupedlist)
#debug probe
#compatibledevices = dedupedlist
#compatibledevices = list_of_device_records
if ( [] == compatibledevices): print "No compatible devices."

#debug probe:
#print "Total device IDs found = ", len(alldeviceids)
#print "Contents of alldeviceids:"
#print '[%s]' % ', '.join(map(str, alldeviceids))
#print "--------------------------------------------------------------"
#print "Number of items in list_of_device_records = ", len(list_of_device_records)
#print "Contents of list_of_device_records:"
#for line in list_of_device_records:
    #print line
#print "--------------------------------------------------------------"
#print "Number of lines in deduped device records = ", len(dedupedlist)
#print "Contents of dedupedlist:"
#for line in dedupedlist:
    #print line
#print "--------------------------------------------------------------"
#print "Number of compatible devices = ", len(compatibledevices)
#print "Contents of compatibledevices:"
#for line in compatibledevices:
    #print line
#print "--------------------------------------------------------------"


#sort smart log data
for dev in compatibledevices:
    Sort_Smart_Test_Logs(dev,Run_SmartCtl("-a " + dev.device_id))
    #debug probe
    #print "--------------------------------------------------------------"
    #print "Logs obtained for Device ID = " + dev.device_id
    #print "SMART attribute values:"
    #print "dev.smartattrib_reallocated_sector_ct = " + dev.smartattrib_reallocated_sector_ct
    #print "dev.smartattrib_power_on_hours = " + dev.smartattrib_power_on_hours
    #print "dev.smartattrib_wear_leveling_count = " + dev.smartattrib_wear_leveling_count
    #print "dev.smartattrib_used_rsvd_blk_cnt_tot = " + dev.smartattrib_used_rsvd_blk_cnt_tot
    #print "dev.smartattrib_program_fail_cnt_total = " + dev.smartattrib_program_fail_cnt_total
    #print "dev.smartattrib_erase_fail_count_total = " + dev.smartattrib_erase_fail_count_total
    #print "dev.smartattrib_runtime_bad_block = " + dev.smartattrib_runtime_bad_block
    #print "dev.smartattrib_reported_uncorrect = " + dev.smartattrib_reported_uncorrect
    #print "dev.smartattrib_airflow_temperature_cel_value = " + dev.smartattrib_airflow_temperature_cel_value
    #print "dev.smartattrib_airflow_temperature_cel_threshold = " + dev.smartattrib_airflow_temperature_cel_threshold
    #print "dev.smartattrib_hardware_ecc_recovered = " + dev.smartattrib_hardware_ecc_recovered
    #print "dev.smartattrib_udma_crc_error_count = " + dev.smartattrib_udma_crc_error_count
    #print "dev.smartattrib_unknown_attribute = " + dev.smartattrib_unknown_attribute
    #print "dev.smartattrib_total_lbas_written = " + dev.smartattrib_total_lbas_written
    #print "SMART Error Log:"
    #print "dev.smart_error_logged = " + dev.smart_error_logged
    #print "SMART Self-Test Log:"
    #print "dev.smart_selftests_in_log = " + dev.smart_selftests_in_log
    #print "dev.smart_selftest_failed = ", dev.smart_selftest_failed
    #raw_input ("paused")

#check device_ids for SMART errors or overtemp condition, also truncate dev.model entries to 11 characters
device_ids_with_smart_errors = []
device_ids_too_hot = []
device_ids_no_temp_support = []
for dev in compatibledevices:
    #truncate dev.model to 11 characters
    dev.model = dev.model[:11]
    #check device_ids for SMART errors or overtemp condition
    Get_Temp_Data(dev) #set unsupported temp fields to None
    if ("yes" in dev.smart_error_logged.lower() or not "none" in dev.smart_selftest_failed.lower() == "none"):
        device_ids_with_smart_errors.append(dev.device_id)
    if ("none" in str(dev.max_temp_celsius).lower() and not "none" in str(dev.current_temp_celsius).lower() ): #if only max temp is unsupported
        if (int(dev.current_temp_celsius) > 43 ):
            device_ids_too_hot.append(dev.device_id)
    elif ("none" in str(dev.current_temp_celsius).lower() ) : #if temp is not at all supported
        device_ids_no_temp_support.append(dev.device_id)
    else:
        if (int(dev.current_temp_celsius) > int(dev.max_temp_celsius)-2 ): #temp and max temp is supported
            device_ids_too_hot.append(dev.device_id)
            print dev.device_id + " too hot. temp = ", int(dev.current_temp_celsius), ", max-2 = ", int(dev.max_temp_celsius)-2
 
#open pipe to sendmail
#smartstatus_email = open ("smartstatus_email.txt", "w") #for writing to file instead of sendmail
smartstatus_email = os.popen("%s -t -i" % sendmail, "w")

#generate email message
smartstatus_email.write( "To: [EMAIL ADDRESS]\n")
if (not device_ids_with_smart_errors == [] ):
    smartstatus_email.write( "Subject: SMART ERRORS DETECTED - local.freenas\n" )
elif (not device_ids_too_hot == [] ):
    smartstatus_email.write( "Subject: DRIVES TOO HOT - local.freenas\n" )
else:
    smartstatus_email.write( "Subject: SMART OK - local.freenas - drive temps & SMART logs\n" )
smartstatus_email.write( "\n" )
#print general system info
smartstatus_email.write( "==============================================================================================\n"  )
writestring = "Output for smartstatus.py " + time.strftime("%m/%d/%y") + " " + time.strftime("%H:%M:%S") + "\n"
smartstatus_email.write( writestring )
writestring = "SmartCtl version = " + str(compatibledevices[0].smartctl_version) + " (script written for SmartCtl 6.2)\n"
smartstatus_email.write( writestring )
writestring = "Python version = " + str(sys.version_info[0]) + "." + str(sys.version_info[1]) + "." + str(sys.version_info[2]) + " (script written for Python 2.7)\n"
smartstatus_email.write( writestring )
writestring = "Total devices detected = " + str(len(alldeviceids)) + "\n"
smartstatus_email.write( writestring )
writestring = "SMART compatible devices = " + str(len(compatibledevices)) + "\n"
smartstatus_email.write( writestring )
writestring =  "==============================================================================================\n"
smartstatus_email.write( writestring )
smartstatus_email.write( "{0:12}{1:13}{2:16}{3:10}{4:12}{5:14}{6:18}".format("Device ID", "Model", "Serial", "Temp/Max", "SMART Logs", "SMART Errors", "Self-Test Errors") )
smartstatus_email.write( "\n" )
#print stats for each device
for x in compatibledevices:
    smartstatus_email.write( "{0:12}{1:13}{2:16}{3:10}{4:12}{5:14}{6:18}".format(x.device_id, x.model, x.serial, str(x.current_temp_celsius) + "/" + str(x.max_temp_celsius), x.smart_selftests_in_log ,x.smart_error_logged, x.smart_selftest_failed)  )
    smartstatus_email.write( "\n" )
writestring =  "==============================================================================================\n"
smartstatus_email.write( writestring )
#print some info about problems, if any
writestring = "Automated warnings: (even if nothing is here then there still may be bad things happening)\n"
smartstatus_email.write ( writestring )
if (len(compatibledevices) < len(alldeviceids)):
    int_unsupported_devices = len(compatibledevices) - len(alldeviceids)
    writestring = "\n"
    smartstatus_email.write ( writestring )
    writestring = "Warning: " + str(int_unsupported_devices) + " unsupported devices were detected. This may be OK, or it may indicate that the script isn't executing properly, or doesn't match the version of smartctl that it was developed on. Examine devices with smartctl in a shell prompt to investigate.\n"
    smartstatus_email.write( writestring )
if (not device_ids_with_smart_errors == [] ):
    smartstatus_email.write( " " )
    for dev in device_ids_with_smart_errors:
        writestring = "WARNING! Device " + dev + " has error(s)!\n"
        smartstatus_email.write( writestring )
    writestring = "Type 'smartctl -a /device/ID' in server shell for more info!\n"
    smartstatus_email.write( writestring )
if (not device_ids_too_hot == [] ):
    for dev in device_ids_too_hot:
        writestring = "WARNING! Device " + dev + " is running hot!\n"
        smartstatus_email.write( writestring )
    smartstatus_email.write( "Type 'smartctl -a /device/ID' in server shell for more info!\n" )
    smartstatus_email.write( writestring )
if (not device_ids_no_temp_support == [] ):
    writestring = ""
    smartstatus_email.write( writestring )
    writestring = "*Notice: The following SMART device(s) do not support SMART drive temperatures:\n"
    smartstatus_email.write( writestring )
    smartstatus_email.write( '[%s]' % ', '.join(map(str, device_ids_no_temp_support)) )

#sendmail sends upon closing
smstatus = smartstatus_email.close()
if (not smstatus == 0 ): #sendmail returns 0 IFF no errors
    #if sendmail returns error, complain into customscriptlog
    customscriptlog = open (customscriptlog_location, "a") # a = append file
    customscriptlog.write("smartstatus.py: "+ time.strftime("%m/%d/%y") + " " + time.strftime("%H:%M:%S") + " Sendmail exit status = " + str(smstatus) + "\n")
    print "Sendmail exit status has error code = " + str(smstatus)
    customscriptlog.close()

 
Last edited:

Fox

Explorer
Joined
Mar 22, 2014
Messages
66
Cool.. Thanks for sharing it.. I will try it when I get a chance, though given my schedule, I am not sure when that will be..
 

Nicholas

Dabbler
Joined
Dec 31, 2013
Messages
13
Hi, just to mess around. Couple of weeks ago i found this script in a sourceforge project, it reads the sysctl output and prints it in a fixed ip. Ends up looking like this:

Capture.jpg


http://sourceforge.net/projects/freenas-tempreport/
 

Ryan Allen

Explorer
Joined
Oct 11, 2016
Messages
93
I would love to see a graph! Good work everybody! I’m going to try this tonight!
 

Plato

Contributor
Joined
Mar 24, 2016
Messages
101
Let's wake up the thread. I'd also like to see this on a graph. Any way to append this to the report pages on FreeNas?
 

Ericloewe

Server Wrangler
Moderator
Joined
Feb 15, 2014
Messages
20,175
Great work,
anyway of getting this into a graph on the "reporting" page ?
I believe there is a ticket for that, which would probably be scheduled for 11.2.
 

schwarci

Cadet
Joined
Nov 9, 2014
Messages
4
How to adjust the code for SAS HDD?
 
Top