#!/bin/ksh # # prtdiag: Sun Enterprise Server check - BB external script test # # version 2.0 # version 2.1 - now checks for failures, faults listed in prtdiag output # as well as bad fans, power supplies, front lights, etc # version 2.2 - works for E450 now, not just 3500-6500 class # version 2.3 - works for E250 now. # version 2.4 - properly uses $CAT instead of cat # version 2.5 - properly uses $MACHINE instead of `uname -n` in bb-hosts check # version 2.6 - properly uses $THIS_HOST instead of $MACHINE due to fqdn using comma in name - thanks Craig Cook # version 2.7 - works for SunBlade now. # version 2.8 - works for Netra T1 (AC 200 specifically) now # version 2.9 - fixed but that didnt display output anyway even if command reports exit of non-zero # version 2.10- problem on LOM servers where faulty wasnt shown in red # also fixed some grep/awk that were not $GREP/$AWK in LOM checks # version 2.11- modified Enterprise - minumum power available messages to yellow # also fixed the bad board check that was actually checking fan status # version 2.12- "Re-fixed" the bad board check that was actually checking fan status # version 2.13- works for E480R now # # BIG BROTHER / XXXXXXXXXXXXXXXX status # # Written by Paul A. Luzzi # on March 22, 2000 # ######################################## # NOTE # This has been tested with BB 1.2b, 1.4h2, 1.5, 1.5a # # Tested on : # Sun Ultra 60, E220R/420R/480R, E250/450, E3000/3500, E4500 ######################################## ######################################## # INSTALLATION # step 1 - update the EXT section of the runbb.sh script to include this # ... ######################################## ################################## # CONFIGURE IT HERE ################################## ################################## # Start of script ################################## if test ! "$BBHOME" then echo "template: BBHOME is not set" exit 1 fi if test ! -d "$BBHOME" then echo "template: BBHOME is invalid" exit 1 fi if test ! "$BBTMP" # GET DEFINITIONS IF NEEDED then echo "template: The BB environment is not set" exit 1 fi ##### ##### Start of custom section added by Paul A. Luzzi ##### ##### ##### Get Status proc - used to get all responses ##### get_status() { ##### ##### Setup any and all variables before beginning. ##### . $BBHOME/etc/bbsys.local ##### ##### Purpose is to report back to a central server, all "standard" ##### detailed information about network, disk, volume, cpu, memory ##### which is not reported thru regular "easy" checks. ##### ##### Any problem areas should be highlighted in : ##### bold, italics, and red font. ##### ##### ##### Setup some variables for use later ##### COLOR="green" ##### ##### Diagnostic Information and more CPU/Memory Info. ##### looking for faults or failures here. ##### PLATFORM_SPEC=`$UNAME -m` echo "


" echo "============== prtdiag -v ==============" echo "--- (/usr/platform/${PLATFORM_SPEC}/sbin/prtdiag -v) ---" echo "
" echo "
" PRTDIAG_COMMAND="/usr/platform/$PLATFORM_SPEC/sbin/prtdiag -v" CATCH_COMMAND=` /usr/platform/$PLATFORM_SPEC/sbin/prtdiag -v ` CC_RESULT="$?" if [ ! "$CC_RESULT" = "0" ] then COLOR="red" echo "Problem running prtdiag - exit code of $CC_RESULT !!!" echo "\n$CATCH_COMMAND" fi if [ ! "$COLOR" = "red" ] then ##### ##### Must be good enough to continue. ##### CATCH_COMMAND2=`$PRTDIAG_COMMAND | $GREP ail | $EGREP -v "No failures|Most recent AC Power Failure" ` ## echo "${CATCH_COMMAND2}..." if [ ! -z "$CATCH_COMMAND2" ] then CATCH_MPM=`echo "$CATCH_COMMAND2" | $GREP -i "System Power Status: Minimum Available" ` if [ ! -z "$CATCH_MPM" ] then COLOR="yellow" echo "\nprtdiag reports some potential power issues !!!" echo "$CATCH_COMMAND2 \n" else COLOR="red" echo "prtdiag reports some failures !!!" echo "$CATCH_COMMAND2" fi fi CATCH_COMMAND3=`$PRTDIAG_COMMAND | $GREP -i ault | $EGREP -v "No System Faults found|NO_FAULT|LOCATOR FAULT" ` ## echo "${CATCH_COMMAND3}..." if [ ! -z "$CATCH_COMMAND3" ] then COLOR="red" echo "prtdiag reports some faults !!!" echo "$CATCH_COMMAND3" fi ##### ##### Now check for specific components. ##### Start with front lights. ##### ENV_CHECK=`$PRTDIAG_COMMAND | $AWK '/.Environmental Status./, /^$/ {print $0}' | $GREP -v "^$" | $WC ` ##### ##### Sun Blade check - Blade 100 specifically .... ##### BLADE_CHECK=`$PRTDIAG_COMMAND | $AWK '/.Environmental Status./, /^$/ {print $1}' | $GREP -v "^$" | "$HEAD" -2 | $TAIL -1 ` ## echo "${BLADE_CHECK}____" if [ "$BLADE_CHECK" = "Fan" ] then ENV_CHECK="0" fi ##### ##### Netra T1 - AC 200 specifically .... ##### NETRA_CHECK=`$PRTDIAG_COMMAND | "$HEAD" -1 | $GREP Netra ` ## echo "${NETRA_CHECK}____" if [ ! -z "$NETRA_CHECK" ] then ENV_CHECK="0" NETRA_PS_CHECK=`/usr/sbin/lom -p | tail -\`expr $(/usr/sbin/lom -p | wc -l) - 1 \` | $CUT -c3-4 | $GREP -v "OK" ` if [ ! -z "$NETRA_PS_CHECK" ] then COLOR="red" echo "\n\n-------------------------------------" echo "Power Supply(s) are bad on $THIS_HOST" /usr/sbin/lom -p echo "-------------------------------------\n\n" fi NETRA_FAN_CHECK=`/usr/sbin/lom -f | tail -\`expr $(/usr/sbin/lom -f | wc -l) - 1 \` | $CUT -c3-4 | $GREP -v "OK" ` if [ ! -z "$NETRA_FAN_CHECK" ] then COLOR="red" echo "\n\n-------------------------------------" echo "Fans are bad on $THIS_HOST" /usr/sbin/lom -f echo "-------------------------------------\n\n" fi NETRA_VOLT_CHECK=`/usr/sbin/lom -v | $AWK '{print $NF}' | $EGREP -v "flags|voltage|breaker" | $GREP -v "status=ok" ` if [ ! -z "$NETRA_VOLT_CHECK" ] then COLOR="red" echo "\n\n-------------------------------------" echo "System Voltages are bad on $THIS_HOST" /usr/sbin/lom -v | $SED s/faulty/"faulty<\/font>"/g echo "-------------------------------------\n\n" fi NETRA_FAULT_CHECK=`/usr/sbin/lom -l | tail -\`expr $(/usr/sbin/lom -p | wc -l) - 1 \` | $GREP -v Alarm3 | $AWK -F"=" '{print $2}' | $GREP -v "off"` if [ ! -z "$NETRA_FAULT_CHECK" ] then COLOR="red" echo "\n\n-------------------------------------" echo "System Faults are lit on $THIS_HOST" /usr/sbin/lom -l | $GREP -v Alarm3 | $SED s/on/"on<\/font>"/g echo "-------------------------------------\n\n" fi fi ##### ##### E3500-6500 specific checks ##### MACH_CLASS="Exx00" if [ "$ENV_CHECK" -gt "1" ] then BAD_LIGHTS=`$PRTDIAG_COMMAND | $AWK '/.Environmental Status./, /^$/ {print $0}' | $GREP -v "^$" | $TAIL -1 | $GREP -v "Normal" ` ## echo "${BAD_LIGHTS}..." if [ ! -z "$BAD_LIGHTS" ] then COLOR="red" echo "prtdiag reports front lights are not normal !!!" $PRTDIAG_COMMAND | $AWK '/.Environmental Status./, /^$/ {print $0}' | $GREP -v "^$" | $TAIL -2 fi fi ##### ##### E250/E450/480R specific checks ##### if [ "$ENV_CHECK" -eq "1" ] then E_CLASS=`$PRTDIAG_COMMAND | $HEAD -1 | $GREP "450 " ` if [ -n "$E_CLASS" ] then ##### ##### Individual error LED's on E450 ##### MACH_CLASS="E450" POWER_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $HEAD -2 | $CUT -c 21-30 ` POWER_ERROR_LIGHT_STATUS=`echo $POWER_ERROR_LIGHT | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` GEN_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $HEAD -2 | $CUT -c 33-46 ` GEN_ERROR_LIGHT_STATUS=`echo $GEN_ERROR_LIGHT | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` ACT_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $HEAD -2 | $CUT -c 48-65 ` ACT_ERROR_LIGHT_STATUS=`echo $ACT_ERROR_LIGHT | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` DISK_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $TAIL -2 | $CUT -c 21-30 ` DISK_ERROR_LIGHT_STATUS=`echo $DISK_ERROR_LIGHT | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` THERM_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $TAIL -2 | $CUT -c 33-46 ` THERM_ERROR_LIGHT_STATUS=`echo $THERM_ERROR_LIGHT | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` PS_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $TAIL -2 | $CUT -c 48-65 ` PS_ERROR_LIGHT_STATUS=`echo $PS_ERROR_LIGHT | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` fi E_CLASS=`$PRTDIAG_COMMAND | $HEAD -1 | $GREP "250 " ` if [ -n "$E_CLASS" ] then ##### ##### Individual error LED's on E250 ##### MACH_CLASS="E250" POWER_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $HEAD -2 | $CUT -c 37-41 ` POWER_ERROR_LIGHT_STATUS=`echo $POWER_ERROR_LIGHT | $TAIL -1 | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` GEN_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $TAIL -2 | $CUT -c 21-33 ` GEN_ERROR_LIGHT_STATUS=`echo $GEN_ERROR_LIGHT | $TAIL -1 | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` ACT_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $TAIL -4 | $HEAD -2 | $CUT -c 37-41 ` ACT_ERROR_LIGHT_STATUS=`echo $ACT_ERROR_LIGHT | $TAIL -1 | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` DISK_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $HEAD -2 | $CUT -c 21-33 ` DISK_ERROR_LIGHT_STATUS=`echo $DISK_ERROR_LIGHT | $TAIL -1 | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` THERM_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $TAIL -2 | $CUT -c 37-41 ` THERM_ERROR_LIGHT_STATUS=`echo $THERM_ERROR_LIGHT | $TAIL -1 | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` PS_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/System LED Status./, /^$/ {print $0}' | $GREP -v "^$" | $TAIL -4| $HEAD -2 | $CUT -c 21-33 ` PS_ERROR_LIGHT_STATUS=`echo $PS_ERROR_LIGHT | $TAIL -1 | $AWK -F"[" '{print $2}' | $AWK -F"]" '{print $1}'` fi E_CLASS=`$PRTDIAG_COMMAND | $HEAD -1 | $GREP "480R" ` if [ -n "$E_CLASS" ] then ##### ##### Individual error LED's on E480R ##### MACH_CLASS="E480R" POWER_ERROR_LIGHT_STATUS=`$PRTDIAG_COMMAND | $AWK '/LOCATOR./, /^$/ {print $0}' | $EGREP -v "\-\-\-|^$|POWER" | $CUT -c 23-25 ` GEN_ERROR_LIGHT_STATUS=`$PRTDIAG_COMMAND | $AWK '/LOCATOR./, /^$/ {print $0}' | $EGREP -v "\-\-\-|^$|POWER" | $CUT -c 14-16 ` DISK_ERROR_LIGHT=`$PRTDIAG_COMMAND | $AWK '/Disk Status./, /^$/ {print $0}' | $EGREP -v "\-\-\-|^$|Status" | $CUT -c 10-17 | $SORT -u | $GREP -v "NO_FAULT" ` if [ -z "$DISK_ERROR_LIGHT" ] then DISK_ERROR_LIGHT_STATUS="OFF" else DISK_ERROR_LIGHT_STATUS=" ON" fi THERM_ERROR_LIGHT_STATUS="OFF" # there is no light for this on 480R PS_ERROR_LIGHT_STATUS="OFF" # there is no light for this on 480R fi ##### ##### Individual error LED's checks ##### if [ ! "$POWER_ERROR_LIGHT_STATUS" = " ON" ] then COLOR="red" echo "prtdiag reports power light is not on !?! ($POWER_ERROR_LIGHT_STATUS)" echo "$POWER_ERROR_LIGHT" fi if [ ! "$GEN_ERROR_LIGHT_STATUS" = "OFF" ] then COLOR="red" echo "prtdiag reports general error light is not off !!! ($GEN_ERROR_LIGHT_STATUS)" echo "$GEN_ERROR_LIGHT" fi if [ ! "$DISK_ERROR_LIGHT_STATUS" = "OFF" ] then COLOR="red" echo "prtdiag reports disk error conditions are set on !!! ($DISK_ERROR_LIGHT_STATUS)" echo "$DISK_ERROR_LIGHT" fi if [ ! "$THERM_ERROR_LIGHT_STATUS" = "OFF" ] then COLOR="red" echo "prtdiag reports thermal error light is on !!!" echo "$THERM_ERROR_LIGHT" fi if [ ! "$PS_ERROR_LIGHT_STATUS" = "OFF" ] then COLOR="red" echo "prtdiag reports power supply error light is on !!!" echo "$PS_ERROR_LIGHT" fi ##### ##### Individual disk LED's ##### if [ "$MACH_CLASS" = "E480R" ] then DISK_LIGHTS=`$PRTDIAG_COMMAND | $AWK '/Disk Status./, /^$/ {print $0}' | $EGREP -v "^$|\-\-\-|Status" | $AWK '$3 !~ /[NO_FAULT]/ {print $1,$2}' ` else DISK_LIGHTS=`$PRTDIAG_COMMAND | $AWK '/Disk LED Status./, /^$/ {print $0}' | $EGREP -v "^$|===" | $TAIL +2 | $GREP "ERROR" ` fi ##### ##### Individual disk LED's checks ##### if [ ! -z "$DISK_LIGHTS" ] then COLOR="red" echo "prtdiag reports some of the disks have problems !!!" echo "$DISK_LIGHTS" fi fi ##### ##### Now check for disabled boards. ##### BOARD_CHECK=`$PRTDIAG_COMMAND | $GREP "Detached Boards" ` if [ ! "$MACH_CLASS" = "Exx00" ] then if [ ! -z "$BOARD_CHECK" ] then BAD_BOARDS=`$PRTDIAG_COMMAND | $AWK '/Detached Boards/, /^$/ {print $0}' | $GREP "disabled" ` ## echo "${BAD_BOARDS}..." if [ ! -z "$BAD_BOARDS" ] then COLOR="red" echo "prtdiag reports disabled or problematic boards !!!" echo "$BAD_BOARDS" fi fi fi ##### ##### E480R now allows for IO card condition check. Added 09-26-02 ##### if [ "$MACH_CLASS" = "E480R" ] then BAD_BOARDS=`$PRTDIAG_COMMAND | $AWK '/Type ID Side Slot/, /^$/ {print $0}' | $EGREP -v "\-\-\-|^$|Freq|Status" | $AWK '$8 !~ /ok/ {print $0}' ` ## echo "${BAD_BOARDS}..." if [ ! -z "$BAD_BOARDS" ] then COLOR="red" echo "prtdiag reports disabled or problematic boards/IO cards !!!" echo "$BAD_BOARDS" fi fi ##### ##### Now check for fans. Updated 09-26-02 for E480R ##### FAN_CHECK=`$PRTDIAG_COMMAND | $EGREP "Fans:|Fan Status:" ` if [ ! -z "$FAN_CHECK" ] then if [ "$MACH_CLASS" = "E480R" ] then BAD_FANS=`$PRTDIAG_COMMAND | $AWK '/RPM/, /^$/ {print $0}' | $EGREP -v "\-\-\-|^$|Status" | $GREP -v "NO_FAULT" ` else BAD_FANS=`$PRTDIAG_COMMAND | $AWK '/Fans:/, /^$/ {print $0}' | $GREP -v "^$" | $TAIL +5 | $GREP -v "OK" ` fi ## echo "${BAD_FANS}..." if [ ! -z "$BAD_FANS" ] then COLOR="red" echo "prtdiag reports cooling fans are not ok !!!" echo "$BAD_FANS" fi fi ##### ##### Now check for Board Temps. ##### TEMP_CHECK=`$PRTDIAG_COMMAND | $GREP "System Temperatures" ` if [ ! -z "$TEMP_CHECK" ] then if [ "$MACH_CLASS" = "E480R" ] then BAD_TEMP=`$PRTDIAG_COMMAND | $AWK '/Temperature Status/, /^$/ {print $0}' | $EGREP -v "\-\-\-|^$" | $GREP -v "OK" ` else BAD_TEMP=`$PRTDIAG_COMMAND | $AWK '/System Temperatures (Celsius):/, /^$/ {print $0}' | $GREP -v "^$" | $TAIL +5 | $GREP -v "OK" ` fi ## echo "${BAD_TEMP}..." if [ ! -z "$BAD_TEMP" ] then COLOR="red" echo "prtdiag reports board temperatures are not stable !!!" echo "$BAD_TEMP" fi fi ##### ##### Now check for Power Supplies ##### PS_CHECK=`$PRTDIAG_COMMAND | $GREP "Power Supplies" ` if [ ! -z "$PS_CHECK" ] then if [ "$MACH_CLASS" = "E480R" ] then BAD_PS=`$PRTDIAG_COMMAND | $AWK '/^Supply/, /^$/ {print $0}' | $EGREP -v "\-\-\-|^$|Status|NO_FAULT" ` else BAD_PS=`$PRTDIAG_COMMAND | $AWK '/Power Supplies:/, /^$/ {print $0}' | $GREP -v "^$" | $TAIL +5 | $GREP -v "OK" ` fi ## echo "${BAD_PS}..." if [ ! -z "$BAD_PS" ] then COLOR="red" echo "prtdiag reports power supply problems !!!" echo "$BAD_PS" fi fi fi if [ ! "$COLOR" = "red" ] then ##### ##### If we get here, assume all is ok and just show prtdiag output ##### echo "${CATCH_COMMAND}..." fi echo "
" ##### ##### Make sure to export COLOR so that it gets back to "central" ##### export COLOR ##### ##### End of get_status proc ##### } ##### ##### Main body ##### touch /tmp/$THIS_HOST.prtdiag $GREP $THIS_HOST $BBHOSTS | $GREP "prtdiag" | while read line do if [ ! -z "$line" ] then echo "

" > /tmp/$THIS_HOST.prtdiag echo "Output of prtdiag on $THIS_HOST " >> /tmp/$THIS_HOST.prtdiag echo "Captured : `$DATE` " >> /tmp/$THIS_HOST.prtdiag get_status >> /tmp/$THIS_HOST.prtdiag $BB $BBDISP "status $THIS_HOST.prtdiag $COLOR `$DATE` `$CAT /tmp/$THIS_HOST.prtdiag` " else COLOR="clear" $BB $BBDISP "status $THIS_HOST.prtdiag $COLOR `$DATE` `$CAT /tmp/$THIS_HOST.prtdiag` " fi done ##### ##### End of custom section added by Paul A. Luzzi ##### ############################################## # end of script ##############################################