#!/bin/bash
# =============================================================================
#  qa_summary  --  one-glance server hardware health card
# =============================================================================

set +x
exec 2>/dev/null

# =============================================================================
#  CONFIGURATION -- EDIT THESE BEFORE EVERYTHING ELSE
# =============================================================================

FW_MANIFEST_URL="http://216.104.40.250/scripts/fw_versions"

DRIVE_ERR_FAIL_THRESHOLD=75
DRIVE_WEAR_FAIL_THRESHOLD=25
NVME_PCT_USED_FAIL=90
NVME_SPARE_WARN=10

W=90


# =============================================================================
#  FIRMWARE MANIFEST
# =============================================================================
declare -A BIOS_LATEST=()
declare -A IPMI_LATEST=()
declare -A RAID_LATEST=()

_FW_RAW=$(wget -q -O- --timeout=10 "$FW_MANIFEST_URL" 2>/dev/null)
if [ -n "$_FW_RAW" ]; then
    _section=""
    while IFS= read -r _line; do
        _line="${_line%%#*}"
        _line=$(printf '%s' "$_line" | sed -E 's/^[[:space:]]+//; s/[[:space:]]+$//')
        [ -z "$_line" ] && continue
        if [[ "$_line" == SECTION=* ]]; then
            _section="${_line#SECTION=}"
            continue
        fi
        [[ "$_line" != *=* ]] && continue
        _key="${_line%%=*}"
        _val="${_line#*=}"
        case "$_section" in
            BIOS) BIOS_LATEST["$_key"]="$_val" ;;
            IPMI) IPMI_LATEST["$_key"]="$_val" ;;
            RAID) RAID_LATEST["$_key"]="$_val" ;;
        esac
    done <<< "$_FW_RAW"
fi
unset _FW_RAW _section _line _key _val

R=$'\033[1;31m'
G=$'\033[1;32m'
Y=$'\033[1;33m'
C=$'\033[1;36m'
M=$'\033[1;35m'
DIM=$'\033[0;37m'
RST=$'\033[0m'
BOLD=$'\033[1m'
BGG=$'\033[42m'
BGR=$'\033[41m'
BGY=$'\033[43m'
BLK=$'\033[0;30m'

rep()  { local i o=''; for((i=0;i<$2;i++)); do o+="$1"; done; printf '%s' "$o"; }
_TL()  { printf '\xe2\x95\x94'; }
_EQ()  { printf '\xe2\x95\x90'; }
_TR()  { printf '\xe2\x95\x97'; }
_ML()  { printf '\xe2\x95\xa0'; }
_MR()  { printf '\xe2\x95\xa3'; }
_VB()  { printf '\xe2\x95\x91'; }
_BL()  { printf '\xe2\x95\x9a'; }
_BR()  { printf '\xe2\x95\x9d'; }
_HD()  { printf '\xe2\x94\x80'; }
hline(){ rep "$(_EQ)" $W; }

box_open() {
    local title="$1" color="${2:-$C}"
    local tlen=${#title} lpad rpad
    lpad=$(( (W - tlen - 2) / 2 ))
    rpad=$(( W - tlen - 2 - lpad ))
    printf '\n'
    printf "%b%s%s%s%b\n" "$color" "$(_TL)" "$(hline)" "$(_TR)" "$RST"
    printf "%b%s%s%b%b%b %s %b%b%s%s%b\n" \
        "$color" "$(_VB)" "$(rep ' ' $lpad)" "$RST" \
        "$BOLD" "$Y" "$title" "$RST" \
        "$color" "$(rep ' ' $rpad)" "$(_VB)" "$RST"
    printf "%b%s%s%s%b\n" "$color" "$(_ML)" "$(hline)" "$(_MR)" "$RST"
}

box_close() {
    printf "%b%s%s%s%b\n" "${1:-$C}" "$(_BL)" "$(hline)" "$(_BR)" "$RST"
}

bline() {
    local text="$1" bc="${2:-$C}"
    local vis pad
    vis=$(printf '%s' "$text" | sed -E 's/\x1b\[[0-9;:]*[mGKHFJA-Za-z]//g')
    pad=$(( W - ${#vis} ))
    [ $pad -lt 0 ] && pad=0
    printf "%b%s%b%s%s%b%s%b\n" "$bc" "$(_VB)" "$RST" \
        "$text" "$(rep ' ' $pad)" "$bc" "$(_VB)" "$RST"
}

bdiv() {
    bline "$(printf "  %b%s%b" "$DIM" "$(rep "$(_HD)" $((W-2)))" "$RST")"
}

FAILS=0; WARNS=0; PASSES=0
FAIL_ITEMS=()
WARN_ITEMS=()

badge_pass(){ printf '%b%b%b PASS %b' "$BGG" "$BLK" "$BOLD" "$RST"; }
badge_fail(){ printf '%b%b FAIL %b'   "$BGR" "$BOLD"        "$RST"; }
badge_warn(){ printf '%b%b%b WARN %b' "$BGY" "$BLK" "$BOLD" "$RST"; }
badge_info(){ printf '%b%b%b INFO %b' "$DIM" "$BLK" "$BOLD" "$RST"; }
badge_skip(){ printf '%b SKIP %b'     "$DIM"                "$RST"; }

verdict_row() {
    local label="$1" detail="$2" verdict="$3" extra="${4:-}"
    local badge lc left vis_len pad
    case "$verdict" in
        PASS) badge=$(badge_pass); lc="$G";   PASSES=$(( PASSES+1 )) ;;
        FAIL) badge=$(badge_fail); lc="$R";   FAILS=$(( FAILS+1 ))   ;;
        WARN) badge=$(badge_warn); lc="$Y";   WARNS=$(( WARNS+1 ))   ;;
        INFO) badge=$(badge_info); lc="$DIM" ;;
        SKIP) badge=$(badge_skip); lc="$DIM" ;;
        NONE) badge=""; lc="$DIM" ;;
    esac
    local extra_part=""
    [ -n "$extra" ] && extra_part="$(printf "  %b%s%b" "$DIM" "$extra" "$RST")"
    if [ -n "$badge" ]; then
        left=$(printf "  %b%-10s%b %b%s%b%s" \
            "$BOLD" "$label" "$RST" \
            "$lc" "$detail" "$RST" \
            "$extra_part")
        vis_len=$(printf '%s' "$left" | sed -E 's/\x1b\[[0-9;:]*[mGKHFJA-Za-z]//g' | awk '{print length}')
        pad=$(( W - vis_len - 6 ))
        [ $pad -lt 1 ] && pad=1
        bline "$(printf "%s%${pad}s%s" "$left" "" "$badge")"
    else
        bline "$(printf "  %b%-10s%b %b%s%b%s" \
            "$BOLD" "$label" "$RST" \
            "$lc" "$detail" "$RST" \
            "$extra_part")"
    fi
}

cont_row() {
    bline "$(printf "  %s%b" "$1" "$RST")"
}

fw_normalize() {
    echo "$1" | awk -F. '{printf "%02d.%02d.%02d",$1+0,$2+0,$3+0}'
}

HOSTNAME=$(uname -n)

# =============================================================================
#  TIMEZONE AUTO-DETECTION via traceroute hop hostnames
# =============================================================================
command -v traceroute >/dev/null 2>&1 || \
    (export DEBIAN_FRONTEND=noninteractive; apt-get update -qq 2>/dev/null && \
     apt-get install -y traceroute 2>/dev/null || \
     apt-get install -y inetutils-traceroute 2>/dev/null || \
     yum install -y traceroute 2>/dev/null) >/dev/null 2>&1

_DCINFO=$(traceroute -m 10 -q 1 8.8.8.8 2>/dev/null | awk 'NR>2 {print $2}' | grep -v '^\*$' | \
while IFS= read -r _h; do
    _h=$(printf '%s' "$_h" | tr '[:upper:]' '[:lower:]')
    case "$_h" in
        *dfw*|*dal*|*ftw*)          printf 'Dallas/Fort Worth|America/Chicago\n'  ;;
        *ord*|*chi*)                printf 'Chicago|America/Chicago\n'            ;;
        *lax*)                      printf 'Los Angeles|America/Los_Angeles\n'    ;;
        *sjc*|*sfo*|*oak*|*scl*|*svl*|*snv*) printf 'Santa Clara|America/Los_Angeles\n' ;;
        *sea*)                      printf 'Seattle|America/Los_Angeles\n'        ;;
        *ewr*|*nwk*|*njr*)         printf 'New Jersey|America/New_York\n'        ;;
        *nyc*|*jfk*)               printf 'New York|America/New_York\n'          ;;
        *atl*)                      printf 'Atlanta|America/New_York\n'           ;;
        *mia*)                      printf 'Miami|America/New_York\n'             ;;
        *iad*|*dca*|*bos*)         printf 'Washington DC|America/New_York\n'     ;;
        *phx*)                      printf 'Phoenix|America/Phoenix\n'            ;;
        *den*)                      printf 'Denver|America/Denver\n'              ;;
        *ams*)                      printf 'Amsterdam|Europe/Amsterdam\n'         ;;
        *lon*|*lhr*|*ldn*)         printf 'London|Europe/London\n'               ;;
        *fra*)                      printf 'Frankfurt|Europe/Berlin\n'            ;;
        *sin*)                      printf 'Singapore|Asia/Singapore\n'           ;;
        *nrt*|*tok*)               printf 'Tokyo|Asia/Tokyo\n'                   ;;
    esac
done | sort | uniq -c | sort -rn | head -1 | awk '{print $2}')

if [ -n "$_DCINFO" ]; then
    _DC_NAME="${_DCINFO%%|*}"
    _DC_TZ="${_DCINFO##*|}"
    _CURRENT_TZ=$(timedatectl show --property=Timezone --value 2>/dev/null || cat /etc/timezone 2>/dev/null | tr -d '[:space:]')
    echo "DC Location: ${_DC_NAME}  |  Timezone: ${_DC_TZ}"
    if [ "$_CURRENT_TZ" != "$_DC_TZ" ]; then
        timedatectl set-timezone "$_DC_TZ" 2>/dev/null
        echo "Timezone updated: ${_CURRENT_TZ} -> ${_DC_TZ}"
    else
        echo "Timezone already correct: ${_DC_TZ}"
    fi
else
    _DC_NAME="Unknown"
    _DC_TZ=""
    echo "DC Location: could not detect -- timezone unchanged"
fi

RUN_TIME=$(date '+%Y-%m-%d %I:%M:%S %p %Z')
OS_NAME=$(grep PRETTY_NAME /etc/*release 2>/dev/null | head -1 | cut -d'"' -f2)
BOARD_MFG=$(dmidecode -t 2 2>/dev/null | grep 'Manufacturer' | head -1 | cut -d: -f2 | xargs)
BOARD_RAW=$(dmidecode -t 2 2>/dev/null | grep 'Product Name' | head -1 | awk '{print $NF}')

# Get IPMI IP first for exclusion
IPMI_IP_TEMP=$(ipmicfg -summary 2>/dev/null | grep "IPv4 Address" | cut -d: -f2 | xargs)

# Collect PUBLIC IPs only (exclude RFC1918 private ranges 10.x, 172.16-31.x, 192.168.x)
# Only from physical interfaces. Exclude loopback, link-local, IPMI IP
ALL_IPS=$(ip -4 -o addr show 2>/dev/null | awk '
    $3=="inet" {
        iface=$2
        sub(/@.*/, "", iface)
        if (iface ~ /^(eth|bond|eno|enp|ens|em)[0-9]/) {
            ip=$4
            sub(/\/.*/, "", ip)
            # Exclude private RFC1918, loopback, link-local
            if (ip ~ /^127\./) next
            if (ip ~ /^169\.254\./) next
            if (ip ~ /^10\./) next
            if (ip ~ /^192\.168\./) next
            # 172.16.0.0 - 172.31.255.255
            split(ip, oct, ".")
            if (oct[1]=="172" && oct[2]+0 >= 16 && oct[2]+0 <= 31) next
            print ip
        }
    }
')

# Remove IPMI IP if present
if [ -n "$IPMI_IP_TEMP" ]; then
    ALL_IPS=$(echo "$ALL_IPS" | grep -v "^${IPMI_IP_TEMP}$")
fi

# Deduplicate, drop empties
ALL_IPS=$(echo "$ALL_IPS" | awk 'NF && !seen[$0]++')

# Count properly
if [ -z "$ALL_IPS" ]; then
    PRIMARY_IP=""
    IP_COUNT=0
else
    PRIMARY_IP=$(echo "$ALL_IPS" | head -1)
    IP_COUNT=$(echo "$ALL_IPS" | wc -l)
fi

if [ -z "$PRIMARY_IP" ]; then
    IP_DISPLAY="(no public IP)"
else
    IP_DISPLAY="${PRIMARY_IP}  (${IP_COUNT} IP$([ "$IP_COUNT" -gt 1 ] && echo 's' || echo ''))"
fi

primary_iface=$(ip -4 -o addr show 2>/dev/null | awk -v ip="$PRIMARY_IP" '$3=="inet" && index($4, ip"/")==1 && $2~/^(bond|eth|eno|enp|ens|em)/ {gsub(/@.*/,"",$2); print $2; exit}')
ETH0_MAC=""
MAC_IFACE=""
if [ -n "$primary_iface" ] && [ -r "/sys/class/net/${primary_iface}/address" ]; then
    ETH0_MAC=$(cat "/sys/class/net/${primary_iface}/address" 2>/dev/null)
    MAC_IFACE="$primary_iface"
fi

CPU_NAME=$(grep "model name" /proc/cpuinfo | head -1 | cut -d: -f2 | xargs | sed -E 's/\(R\)//g;s/\(TM\)//g;s/CPU //;s/ @ [0-9.]+GHz//;s/  +/ /g')
CPU_COUNT=$(grep "physical id" /proc/cpuinfo | sort -u | wc -l)
CPU_THREADS=$(grep -c "model name" /proc/cpuinfo)
CPU_DETAIL="${CPU_COUNT}x ${CPU_NAME} (${CPU_THREADS}t)"

DMI17=$(dmidecode -t 17 2>/dev/null)
DIMM_LINES=$(printf '%s' "$DMI17" | awk '
    function flush() { if(size!=""&&size!~/No Module/) print size"|"type"|"speed"|"mfg"|"part; size="";type="";speed="";mfg="";part="" }
    BEGIN{size="";type="";speed="";mfg="";part=""}
    /^Memory Device$/                        {flush()}
    /^[ \t]*Size:/                           {sub(/^[ \t]*Size:[ \t]*/,"");size=$0}
    /^[ \t]*Type:/&&!/Type Detail/           {sub(/^[ \t]*Type:[ \t]*/,"");type=$0}
    /^[ \t]*Speed:/&&!/Configured/           {sub(/^[ \t]*Speed:[ \t]*/,"");speed=$0}
    /^[ \t]*Configured Memory Speed:/        {if(speed==""||speed~/Unknown/){sub(/^[ \t]*Configured Memory Speed:[ \t]*/,"");speed=$0}}
    /^[ \t]*Configured Clock Speed:/         {if(speed==""||speed~/Unknown/){sub(/^[ \t]*Configured Clock Speed:[ \t]*/,"");speed=$0}}
    /^[ \t]*Manufacturer:/                   {sub(/^[ \t]*Manufacturer:[ \t]*/,"");mfg=$0}
    /^[ \t]*Part Number:/                    {sub(/^[ \t]*Part Number:[ \t]*/,"");sub(/[ \t]+$/,"");part=$0}
    END{flush()}
')
DIMM_COUNT=$(printf '%s\n' "$DIMM_LINES" | grep -cv '^$')
DIMM_GROUPS=$(printf '%s\n' "$DIMM_LINES" | grep -v '^$' | sort | uniq -c | awk '{count=$1;$1="";sub(/^ /,"");print count"|"$0}')
DIMM_UNIQUE=$(printf '%s\n' "$DIMM_GROUPS" | grep -c .)

HAS_ECC="no"; HAS_REG="no"; HAS_LRDIMM="no"
dmidecode -t 16 2>/dev/null | grep -qiE "ECC|Correcting|Multi-bit" && HAS_ECC="yes"
if [ "$HAS_ECC" = "no" ]; then
    _tw=$(printf '%s' "$DMI17" | grep -m1 "Total Width:" | grep -oE '[0-9]+')
    _dw=$(printf '%s' "$DMI17" | grep -m1 "Data Width:" | grep -oE '[0-9]+')
    if [ -n "$_tw" ] && [ -n "$_dw" ] && [ "$_tw" -gt "$_dw" ] 2>/dev/null; then
        HAS_ECC="yes"
    fi
fi
printf '%s' "$DMI17" | grep -i "Type Detail:" | grep -qiE "Registered|Buffered" && HAS_REG="yes"
printf '%s' "$DMI17" | grep -i "Type Detail:" | grep -qi "LRDIMM" && HAS_LRDIMM="yes"
# Fallback: detect RDIMM/LRDIMM from part number prefix when Type Detail is vague
# Samsung:  M393 = RDIMM, M386 = LRDIMM (load-reduced)
# Micron:   MTA18 = RDIMM, MTA36 = LRDIMM
# SK Hynix: HMA84/HMA82 = RDIMM, HMAA8 = LRDIMM
# Crucial:  36ASF = RDIMM
# Kingston: KSM = RDIMM
if [ "$HAS_REG" = "no" ] && [ "$HAS_LRDIMM" = "no" ] && [ "$HAS_ECC" = "yes" ]; then
    # Check LRDIMM first (more specific)
    printf '%s' "$DMI17" | grep -i "Part Number:" | grep -qiE "M386|MTA36|HMAA8" && HAS_LRDIMM="yes"
    # Then RDIMM
    if [ "$HAS_LRDIMM" = "no" ]; then
        printf '%s' "$DMI17" | grep -i "Part Number:" | grep -qiE "M393|MTA18|HMA84|HMA82|36ASF|KSM" && HAS_REG="yes"
    fi
fi

RAM_TYPE_TAG=""
if [ "$HAS_LRDIMM" = "yes" ] && [ "$HAS_ECC" = "yes" ]; then
    RAM_TYPE_TAG="ECC/LRDIMM"
elif [ "$HAS_LRDIMM" = "yes" ]; then
    RAM_TYPE_TAG="LRDIMM"
elif [ "$HAS_REG" = "yes" ] && [ "$HAS_ECC" = "yes" ]; then
    RAM_TYPE_TAG="ECC/REG"
elif [ "$HAS_REG" = "yes" ]; then
    RAM_TYPE_TAG="REG"
elif [ "$HAS_ECC" = "yes" ]; then
    RAM_TYPE_TAG="ECC"
fi

RAM_DISPLAY_LINES=()
while IFS='|' read -r count size type speed mfg part; do
    [ -z "$count" ] && continue
    size=$(echo "$size" | sed 's/ //g')
    speed=$(echo "$speed" | sed -E 's/ MT\/s//;s/ MHz//')
    [ -z "$part" ] || [ "$part" = "Unknown" ] && part="$mfg"
    [ "$part" = "Unknown" ] && part=""
    line="${count}x ${size} ${type} ${part} @${speed}"
    RAM_DISPLAY_LINES+=("$line")
done <<< "$DIMM_GROUPS"

if [ "$DIMM_COUNT" -eq 0 ]; then
    RAM_VERDICT="FAIL"; RAM_FAIL_REASON="no DIMMs detected"
elif [ "$DIMM_UNIQUE" -gt 1 ]; then
    RAM_VERDICT="FAIL"; RAM_FAIL_REASON="mismatched sticks"
elif [ "$HAS_ECC" = "no" ] && [ "$HAS_REG" = "no" ] && [ "$HAS_LRDIMM" = "no" ]; then
    RAM_VERDICT="WARN"; RAM_FAIL_REASON="non-ECC memory"
else
    RAM_VERDICT="PASS"; RAM_FAIL_REASON=""
fi

CURRENT_BIOS=$(dmidecode -s bios-version 2>/dev/null | tr -d '[:space:]')
CURRENT_IPMI=$(ipmicfg -summary 2>/dev/null | grep -i "Firmware Revision" | grep -oE '[0-9]+\.[0-9]+(\.[0-9]+)?' | head -1)
LATEST_BIOS="${BIOS_LATEST[${BOARD_RAW}]:-}"
LATEST_IPMI="${IPMI_LATEST[${BOARD_RAW}]:-}"

if [ -z "$CURRENT_BIOS" ]; then
    BIOS_VERDICT="FAIL"; BIOS_DETAIL="unreadable"
    FAIL_ITEMS+=("BIOS|version unreadable")
elif [ -z "$LATEST_BIOS" ]; then
    BIOS_VERDICT="WARN"; BIOS_DETAIL="${CURRENT_BIOS} (latest unknown)"
    WARN_ITEMS+=("BIOS|no manifest entry for ${BOARD_RAW}")
else
    cn=$(fw_normalize "$CURRENT_BIOS"); ln=$(fw_normalize "$LATEST_BIOS")
    if [[ "$cn" < "$ln" ]]; then
        BIOS_VERDICT="FAIL"; BIOS_DETAIL="${CURRENT_BIOS} -> ${LATEST_BIOS}"
        FAIL_ITEMS+=("BIOS|outdated: ${CURRENT_BIOS} -> ${LATEST_BIOS}")
    else
        BIOS_VERDICT="PASS"; BIOS_DETAIL="${CURRENT_BIOS} (LATEST)"
    fi
fi

if [ -z "$CURRENT_IPMI" ]; then
    IPMI_VERDICT="FAIL"; IPMI_DETAIL="unreadable"
    FAIL_ITEMS+=("IPMI FW|version unreadable")
elif [ -z "$LATEST_IPMI" ]; then
    IPMI_VERDICT="WARN"; IPMI_DETAIL="${CURRENT_IPMI} (latest unknown)"
    WARN_ITEMS+=("IPMI FW|${IPMI_DETAIL}")
else
    cn=$(fw_normalize "$CURRENT_IPMI"); ln=$(fw_normalize "$LATEST_IPMI")
    if [[ "$cn" < "$ln" ]]; then
        IPMI_VERDICT="FAIL"; IPMI_DETAIL="${CURRENT_IPMI} -> ${LATEST_IPMI}"
        FAIL_ITEMS+=("IPMI FW|Outdated: ${IPMI_DETAIL}")
    else
        IPMI_VERDICT="PASS"; IPMI_DETAIL="${CURRENT_IPMI} (LATEST)"
    fi
fi

LSPCI=$(lspci 2>/dev/null)
HAS_RAID_10G=0
_LSPCI_V=$(lspci -v 2>/dev/null)
_LSPCI_PLAIN=$(lspci 2>/dev/null)

# Determine if a BDF is a true PCIe add-in card (not onboard).
# Method: walk up to the parent bridge and check if it has "Slot Implemented+"
# in its PCIe capability — this is set only when a physical slot connector exists.
# Also accept any device that has a Physical Slot number AND is on a bus whose
# parent bridge shows Slot Implemented.
_is_addon_card() {
    local bdf="$1"
    # Get the bus number of this device (first two hex digits of BDF)
    local bus
    bus=$(printf '%s' "$bdf" | cut -d: -f1)
    # Find the parent bridge: it's the device whose secondary bus = our bus
    local parent_bdf
    parent_bdf=$(lspci -vvv 2>/dev/null | awk -v tgt="$bus" '
        /^[0-9a-f]/ { cur=$1 }
        /Secondary bus:/ {
            sec=$NF+0
            printf "%d\n", "0x"tgt > "/dev/null"
            if (sprintf("%02x", sec) == tgt) { print cur; exit }
        }
    ')
    [ -z "$parent_bdf" ] && return 1
    # Check if parent bridge has Slot Implemented+
    lspci -vvv -s "$parent_bdf" 2>/dev/null | grep -q "Slot Implemented+" && return 0
    return 1
}

# Also build a set of BDFs that have a Physical Slot entry in lspci -v
_SLOTTED=$(printf '%s\n' "$_LSPCI_V" | awk '
    /^[0-9a-f]/ {
        if (match($0, /^([0-9a-f][0-9a-f]:[0-9a-f][0-9a-f]\.[0-9])/)) {
            bdf=substr($0, RSTART, RLENGTH)
            has_slot=0; slot_num=0
        }
    }
    /Physical Slot:/ { slot_num=$NF+0; if(slot_num>0) has_slot=1 }
    /^$/ && has_slot { print bdf }
')

# Check all RAID and 10G devices -- both slotted AND add-on-verified
while IFS= read -r _line; do
    _bdf=$(printf '%s' "$_line" | awk '{print $1}')
    [ -z "$_bdf" ] && continue

    _is_raid=0; _is_10g=0
    echo "$_line" | grep -qiE "RAID bus controller|RAID controller|MegaRAID|SAS.*[0-9]{4}|Avago|Broadcom.*SAS|LSI Logic" && _is_raid=1
    echo "$_line" | grep -qiE "Ethernet" && echo "$_line" | grep -qiE \
        "10-Gigabit|10GbE|10GBASE|10G[ -]?SFP|X520|X540|X550|X710|X722|XL710|XXV710|E810|82599|Connect[Xx]-[3-7]|57711|57810|57840|BCM578|Aquantia|Chelsio T[3-6]" \
        && _is_10g=1
    [ "$_is_raid" -eq 0 ] && [ "$_is_10g" -eq 0 ] && continue

    # Accept if it has a physical slot number OR if parent bridge has slot connector
    _in_slot=0
    printf '%s\n' "$_SLOTTED" | grep -q "^${_bdf}$" && _in_slot=1
    [ "$_in_slot" -eq 0 ] && _is_addon_card "$_bdf" && _in_slot=1
    [ "$_in_slot" -eq 1 ] && HAS_RAID_10G=1

done < <(printf '%s\n' "$_LSPCI_PLAIN" | grep -iE "RAID bus controller|RAID controller|MegaRAID|SAS.*[0-9]{4}|Avago|Broadcom.*SAS|LSI Logic|Ethernet.*10-Gigabit|Ethernet.*X52|Ethernet.*X54|Ethernet.*X71|Ethernet.*E810|Ethernet.*82599|Ethernet.*Connect|Ethernet.*BCM578|Ethernet.*Aquantia|Ethernet.*Chelsio")

unset _SLOTTED _bdf _line _is_raid _is_10g _in_slot _LSPCI_V _LSPCI_PLAIN _is_addon_card

# Detect primary RAID controller -- pick the one with the most physical drives
CTRL_IDX=0
CTRL_NAME=""
CTRL_FW=""
RAID_PD_COUNT=0
if command -v storcli >/dev/null 2>&1; then
    _CTRL_TOTAL=$(storcli show 2>/dev/null | grep -oE 'Number of Controllers = [0-9]+' | grep -oE '[0-9]+$')
    [ -z "$_CTRL_TOTAL" ] && _CTRL_TOTAL=1
    for _ci in $(seq 0 $(( _CTRL_TOTAL - 1 ))); do
        _cn=$(storcli /c${_ci} show 2>/dev/null | grep 'Product Name' | cut -d'=' -f2 | xargs)
        _cf=$(storcli /c${_ci} show 2>/dev/null | grep 'FW Package Build' | cut -d'=' -f2 | xargs)
        _pd=$(storcli /c${_ci} show 2>/dev/null | grep -i "Physical Drives" | head -1 | awk -F= '{print $2}' | xargs)
        [ -z "$_pd" ] && _pd=0
        if [ "$_pd" -gt "$RAID_PD_COUNT" ] 2>/dev/null; then
            CTRL_IDX=$_ci
            CTRL_NAME="$_cn"
            CTRL_FW="$_cf"
            RAID_PD_COUNT=$_pd
        elif [ -z "$CTRL_NAME" ] && [ -n "$_cn" ]; then
            CTRL_IDX=$_ci
            CTRL_NAME="$_cn"
            CTRL_FW="$_cf"
        fi
    done
    unset _CTRL_TOTAL _ci _cn _cf _pd
fi
[ -z "$RAID_PD_COUNT" ] && RAID_PD_COUNT=0

IS_REAL_RAID=false
[ -n "$CTRL_NAME" ] && [ "$RAID_PD_COUNT" -gt 0 ] 2>/dev/null && IS_REAL_RAID=true

BBU_CV_DETAIL=""; BBU_CV_VERDICT=""
if $IS_REAL_RAID; then
    CV_OUT=$(storcli /c${CTRL_IDX}/cv show 2>/dev/null)
    BBU_OUT=$(storcli /c${CTRL_IDX}/bbu show 2>/dev/null)
    CV_MODEL=$(echo "$CV_OUT" | awk '/Model[[:space:]]+State[[:space:]]/{found=1;next} found && /[A-Za-z0-9]/{print $1;exit}')
    CV_STATE=$(echo "$CV_OUT" | awk '/Model[[:space:]]+State[[:space:]]/{found=1;next} found && /[A-Za-z0-9]/{print $2;exit}')
    if [ -n "$CV_MODEL" ] && [ -n "$CV_STATE" ] && ! echo "$CV_MODEL" | grep -qiE "^N/A$|^-$|^No" && ! echo "$CV_OUT" | grep -qiE "No CacheVault|not present|N/A"; then
        BBU_CV_DETAIL="CacheVault ${CV_MODEL}  ${CV_STATE}"
        BBU_CV_VERDICT="PASS"
        if echo "$CV_STATE" | grep -qi "Optimal"; then
            BBU_CV_VERDICT="PASS"
        elif echo "$CV_STATE" | grep -qiE "Dgd|Degraded"; then
            BBU_CV_VERDICT="FAIL"
            FAIL_ITEMS+=("BBU/CV|CacheVault degraded: ${CV_STATE}")
        else
            BBU_CV_VERDICT="WARN"
            WARN_ITEMS+=("BBU/CV|CacheVault state: ${CV_STATE}")
        fi
    elif echo "$BBU_OUT" | grep -qiE "^iBBU"; then
        BBU_MODEL=$(echo "$BBU_OUT" | grep "^iBBU" | awk '{print $1}')
        BBU_STATE=$(echo "$BBU_OUT" | grep "^iBBU" | awk '{print $2" "$3" "$4}' | sed 's/[()]//g')
        [ -z "$BBU_MODEL" ] && BBU_MODEL="(unknown)"
        [ -z "$BBU_STATE" ] && BBU_STATE="(unknown state)"
        BBU_CV_DETAIL="BBU ${BBU_MODEL}  ${BBU_STATE}"
        BBU_CV_VERDICT="PASS"
        if echo "$BBU_STATE" | grep -qi "Optimal\|Fully Charged"; then
            BBU_CV_VERDICT="PASS"
        elif echo "$BBU_STATE" | grep -qiE "Dgd|Degraded"; then
            BBU_CV_VERDICT="FAIL"
            FAIL_ITEMS+=("BBU/CV|BBU degraded: ${BBU_STATE}")
        elif echo "$BBU_STATE" | grep -qi "Charging\|Normal"; then
            BBU_CV_VERDICT="WARN"
            WARN_ITEMS+=("BBU/CV|BBU state: ${BBU_STATE}")
        else
            BBU_CV_VERDICT="WARN"
            WARN_ITEMS+=("BBU/CV|BBU state: ${BBU_STATE}")
        fi
    else
        BBU_CV_DETAIL="none detected"
        BBU_CV_VERDICT="WARN"
        WARN_ITEMS+=("BBU/CV|No battery backup on RAID controller")
    fi
fi

if [ -z "$CTRL_NAME" ]; then
    RAID_VERDICT="SKIP"; RAID_DETAIL="no controller"
elif ! $IS_REAL_RAID; then
    RAID_VERDICT="PASS"; RAID_DETAIL="${CTRL_NAME} HBA  FW ${CTRL_FW:-?}"
else
    LATEST_RAID_FW=""
    if [ "${#RAID_LATEST[@]}" -gt 0 ]; then
        for tok in $CTRL_NAME; do
            if [ -n "${RAID_LATEST[$tok]:-}" ]; then
                LATEST_RAID_FW="${RAID_LATEST[$tok]}"; break
            fi
        done
        if [ -z "$LATEST_RAID_FW" ]; then
            mod=$(echo "$CTRL_NAME" | grep -oE '[0-9]{4}' | head -1)
            [ -n "$mod" ] && LATEST_RAID_FW="${RAID_LATEST[$mod]:-}"
        fi
    fi
    if [ "$LATEST_RAID_FW" = "EOL" ]; then
        RAID_VERDICT="WARN"; RAID_DETAIL="${CTRL_NAME} EOL  FW ${CTRL_FW}"
        WARN_ITEMS+=("RAID|Controller EOL: ${CTRL_NAME}")
    elif [ -z "$LATEST_RAID_FW" ]; then
        RAID_VERDICT="WARN"; RAID_DETAIL="${CTRL_NAME}  FW ${CTRL_FW} (latest unknown)"
        WARN_ITEMS+=("RAID|FW version unknown for ${CTRL_NAME}")
    else
        cr=$(echo "$CTRL_FW" | tr '-' '.' | awk -F. '{printf "%05d%05d%05d%05d",$1+0,$2+0,$3+0,$4+0}')
        lr=$(echo "$LATEST_RAID_FW" | tr '-' '.' | awk -F. '{printf "%05d%05d%05d%05d",$1+0,$2+0,$3+0,$4+0}')
        if [[ "$cr" < "$lr" ]]; then
            RAID_VERDICT="FAIL"; RAID_DETAIL="${CTRL_NAME}  ${CTRL_FW} -> ${LATEST_RAID_FW}"
            FAIL_ITEMS+=("RAID|Outdated FW: ${CTRL_FW} -> ${LATEST_RAID_FW}")
        else
            RAID_VERDICT="PASS"; RAID_DETAIL="${CTRL_NAME}  FW ${CTRL_FW} (LATEST)"
        fi
    fi
fi

FAN_RAW=$(ipmicfg -fan 2>/dev/null)
FAN_MODE_RAW=$(printf '%s\n' "$FAN_RAW" | grep -i "Current Fan Speed Mode" | sed 's/.*:[[:space:]]*//' | tr -d '[]' | xargs)
if   echo "$FAN_MODE_RAW" | grep -qi "Heavy IO";  then FAN_MODE="Heavy IO"
elif echo "$FAN_MODE_RAW" | grep -qi "Optimal";   then FAN_MODE="Optimal"
elif echo "$FAN_MODE_RAW" | grep -qi "Standard";  then FAN_MODE="Standard"
elif echo "$FAN_MODE_RAW" | grep -qi "Full";      then FAN_MODE="Full Speed"
else FAN_MODE="$FAN_MODE_RAW"; fi

# Parse supported modes to find the mode numbers dynamically.
# Boards differ: some have Optimal=2, others have PUE2 Optimal=3, etc.
# Look for a line matching N:keyword in the supported modes list.
_fan_num_for() {
    local keyword="$1"
    printf '%s\n' "$FAN_RAW" | grep -iE "^[[:space:]]*[0-9]+:.*${keyword}" | grep -oE "^[[:space:]]*[0-9]+" | tr -d ' ' | head -1
}
FAN_NUM_OPTIMAL=$(  _fan_num_for "Optimal")
FAN_NUM_HEAVYIO=$(  _fan_num_for "Heavy")
# Fallback to hardcoded values if not found in supported list
[ -z "$FAN_NUM_OPTIMAL" ] && FAN_NUM_OPTIMAL=2
[ -z "$FAN_NUM_HEAVYIO" ] && FAN_NUM_HEAVYIO=4

if [ "$HAS_RAID_10G" -eq 1 ]; then
    EXPECTED_FAN="Heavy IO"
    EXPECTED_FAN_NUM="$FAN_NUM_HEAVYIO"
else
    EXPECTED_FAN="Optimal"
    EXPECTED_FAN_NUM="$FAN_NUM_OPTIMAL"
fi

if [ -z "$FAN_MODE" ]; then
    FAN_VERDICT="FAIL"; FAN_DETAIL="cannot read"
    FAIL_ITEMS+=("Fan|cannot read mode")
elif [ "$EXPECTED_FAN" = "Heavy IO" ] && echo "$FAN_MODE" | grep -qi "Heavy IO"; then
    FAN_VERDICT="PASS"; FAN_DETAIL="${FAN_MODE}"
elif [ "$EXPECTED_FAN" = "Optimal" ] && echo "$FAN_MODE" | grep -qi "Optimal"; then
    FAN_VERDICT="PASS"; FAN_DETAIL="${FAN_MODE}"
else
    FAN_VERDICT="WARN"; FAN_DETAIL="${FAN_MODE} (expected ${EXPECTED_FAN})"
    WARN_ITEMS+=("Fan|set to ${FAN_MODE}, expected ${EXPECTED_FAN}")
fi
unset FAN_RAW FAN_MODE_RAW _fan_num_for

IPMI_IP=$(ipmicfg -summary 2>/dev/null | grep "IPv4 Address" | cut -d: -f2 | xargs)
if [ -z "$IPMI_IP" ] || [ "$IPMI_IP" = "0.0.0.0" ]; then
    IPMINET_VERDICT="FAIL"; IPMINET_DETAIL="no IPMI IP"
    FAIL_ITEMS+=("IPMI Net|No IP configured")
elif ping -c 1 -W 2 "$IPMI_IP" >/dev/null 2>&1; then
    IPMINET_VERDICT="PASS"; IPMINET_DETAIL="${IPMI_IP} (pinging)"
else
    IPMINET_VERDICT="FAIL"; IPMINET_DETAIL="${IPMI_IP} (not pinging)"
    FAIL_ITEMS+=("IPMI Net|Not pinging: ${IPMI_IP}")
fi

# Private IP ping check (eth1)
PRIV_IP=$(ip -4 -o addr show eth1 2>/dev/null | awk '{print $4}' | cut -d/ -f1 | head -1)
if [ -z "$PRIV_IP" ]; then
    PRIVNET_VERDICT="SKIP"; PRIVNET_DETAIL="no eth1 IP"
elif ping -c 1 -W 2 "$PRIV_IP" >/dev/null 2>&1; then
    PRIVNET_VERDICT="PASS"; PRIVNET_DETAIL="${PRIV_IP} (pinging)"
else
    PRIVNET_VERDICT="FAIL"; PRIVNET_DETAIL="${PRIV_IP} (not pinging)"
    FAIL_ITEMS+=("Private Net|Not pinging: ${PRIV_IP}")
fi

declare -a NIC_LINES=()
while IFS= read -r iface; do
    [ -z "$iface" ] && continue
    echo "$iface" | grep -qE '@|\.' && continue
    speed=$(ethtool "$iface" 2>/dev/null | awk -F: '/Speed:/{gsub(/^[ \t]+/,"",$2); print $2}' | xargs)
    link=$(ethtool  "$iface" 2>/dev/null | awk -F: '/Link detected:/{gsub(/^[ \t]+/,"",$2); print $2}' | xargs)
    [ -z "$speed" ] && speed="unknown"
    [ -z "$link"  ] && link="unknown"
    if echo "$link" | grep -qi "yes"; then
        link_label="up"; link_color="$G"
    else
        link_label="down"; link_color="$R"
    fi
    NIC_LINES+=("${iface}|${speed}|${link_label}|${link_color}")
done < <(ip -o link show 2>/dev/null | awk -F': ' '{print $2}' | grep -E '^(eth|bond|eno|enp|ens|em)[0-9]' | sort)

_SEL_RAW=$(ipmicfg -sel list 2>/dev/null | grep -vE "^$|^SEL|No.*entries|List is empty|ACPowerOn|AC Power on|First AC|Battery presence detected|Power Supply presence detected" | grep -v "^[[:space:]]*$")
SEL_JOINED=""
SEL_CRIT_COUNT=0; SEL_WARN_COUNT=0; SEL_INFO_COUNT=0
if [ -n "$_SEL_RAW" ]; then
    SEL_JOINED=$(printf '%s\n' "$_SEL_RAW" | awk '
        /^[[:space:]]*[0-9]+[[:space:]]*\|/ { if (buf!="") print buf; buf=$0; next }
        { if (buf!="") buf=buf " " $0 }
        END { if (buf!="") print buf }
    ' | tac)
    while IFS= read -r _sl; do
        [ -z "$_sl" ] && continue
        if echo "$_sl" | grep -qiE "\|[[:space:]]*Critical[[:space:]]*\|"; then
            SEL_CRIT_COUNT=$(( SEL_CRIT_COUNT + 1 ))
        elif echo "$_sl" | grep -qiE "\|[[:space:]]*Warning[[:space:]]*\|"; then
            SEL_WARN_COUNT=$(( SEL_WARN_COUNT + 1 ))
        else
            SEL_INFO_COUNT=$(( SEL_INFO_COUNT + 1 ))
        fi
    done <<< "$SEL_JOINED"
fi
SEL_TOTAL=$(( SEL_CRIT_COUNT + SEL_WARN_COUNT + SEL_INFO_COUNT ))

SDR_LINES=$(ipmicfg -sdr full 2>/dev/null | grep -vE "^\s*$|^\s*Status\s*\||^\s*-+\s*\|" | grep -vE "^\s*OK\s*\|" | grep -vE "^\s*\|.*N/A.*N/A")

declare -a DRV_DEV DRV_SLOT DRV_DID DRV_DG DRV_SIZE DRV_STATE DRV_ERR DRV_WEAR DRV_MODEL DRV_SERIAL DRV_STATUS DRV_FW
DRIVE_FAIL_COUNT=0

collect_drive() {
    local dev="$1" slot="$2" dg="$3" megaraid_did="$4" state_in="$5" storcli_size="${6:-}"
    local smart_out model serial size
    if [ -n "$megaraid_did" ]; then
        smart_out=$(smartctl -a -d "megaraid,${megaraid_did}" /dev/bus/0 2>/dev/null)
    else
        smart_out=$(smartctl -a "$dev" 2>/dev/null)
    fi
    model=$(printf '%s' "$smart_out" | grep -E "Device Model|Model Number" | head -1 | cut -d: -f2 | xargs)
    serial=$(printf '%s' "$smart_out" | grep "Serial Number" | head -1 | cut -d: -f2 | xargs)
    local fw_ver
    fw_ver=$(printf '%s' "$smart_out" | grep -E "^Firmware Version" | head -1 | cut -d: -f2 | xargs)
    [ -z "$model" ]  && model="(unknown)"
    [ -z "$serial" ] && serial="(unknown)"
    [ -z "$fw_ver" ] && fw_ver="-"

    size=""
    if [ -n "$storcli_size" ]; then
        size="$storcli_size"
    elif [ -n "$dev" ] && [ -b "$dev" ]; then
        _sb=$(lsblk -bdno SIZE "$dev" 2>/dev/null | head -1)
        [ -n "$_sb" ] && [ "$_sb" -gt 0 ] 2>/dev/null && size=$(awk -v b="$_sb" 'BEGIN{if (b>=1000000000000) printf "%.2fTB", b/1000000000000; else if (b>=1000000000) printf "%.2fGB", b/1000000000; else if (b>=1000000) printf "%.2fMB", b/1000000; else printf "%dB", b}')
    fi
    if [ -z "$size" ]; then
        _sc=$(printf '%s' "$smart_out" | grep "User Capacity" | head -1 | cut -d'[' -f2 | cut -d']' -f1 | xargs)
        [ -n "$_sc" ] && size="$_sc"
    fi
    [ -z "$size" ] && size="?"

    model_short=$(echo "$model" | sed -E 's/^(INTEL|SAMSUNG|SEAGATE|TOSHIBA|HITACHI|HGST|WDC|WD|MICRON|KINGSTON|CRUCIAL|SANDISK)[[:space:]]+//I')

    local is_nvme=0
    printf '%s' "$smart_out" | grep -qi "NVM Express\|nvme" && is_nvme=1

    local total_err=0 wear="-" drv_status="PASS"

    if [ "$is_nvme" -eq 1 ] && command -v nvme >/dev/null 2>&1; then
        local _ctrl="$dev"
        [ -b "$dev" ] && _ctrl=$(echo "$dev" | sed -E 's/n[0-9]+$//')
        [ -e "$_ctrl" ] || _ctrl="$dev"
        local _nvlog
        _nvlog=$(nvme smart-log "$_ctrl" 2>/dev/null)
        local _pct _spare _warn
        _pct=$(printf '%s' "$_nvlog" | awk -F: '/percentage_used/{gsub(/[^0-9]/,"",$2);print $2+0}')
        _spare=$(printf '%s' "$_nvlog" | awk -F: '/available_spare[^_]/{gsub(/[^0-9]/,"",$2);print $2+0}')
        _warn=$(printf '%s' "$_nvlog" | awk -F: '/critical_warning/{gsub(/[^0-9a-fx]/,"",$2);print $2}')
        [ -z "$_pct" ]   && _pct="-"
        [ -z "$_spare" ] && _spare="-"
        [ -z "$_warn" ]  && _warn="-"
        wear="${_pct}/${_spare}"
        total_err=0
        [ "$_pct" != "-" ] && [ "$_pct" -ge "$NVME_PCT_USED_FAIL" ] 2>/dev/null && drv_status="FAIL"
        [ "$_spare" != "-" ] && [ "$_spare" -lt "$NVME_SPARE_WARN" ] 2>/dev/null && [ "$drv_status" = "PASS" ] && drv_status="WARN"
        [ "$_warn" != "-" ] && [ "$_warn" != "0x00" ] && [ "$_warn" != "0" ] && drv_status="FAIL"
    else
        local reallocated=0 pending=0 offline=0 reported=0 udma=0 wear_v=""
        reallocated=$(printf '%s' "$smart_out" | grep "Reallocated_Sector_Ct" | awk '{print $NF}')
        pending=$(printf '%s' "$smart_out" | grep "Current_Pending_Sector" | awk '{print $NF}')
        offline=$(printf '%s' "$smart_out" | grep "Offline_Uncorrectable" | awk '{print $NF}')
        reported=$(printf '%s' "$smart_out" | grep "Reported_Uncorrect" | awk '{print $NF}')
        udma=$(printf '%s' "$smart_out" | grep "UDMA_CRC_Error_Count" | awk '{print $NF}')
        wear_v=$(printf '%s' "$smart_out" | grep "Media_Wearout_Indicator" | awk '{for(i=1;i<=NF;i++) if($i=="Media_Wearout_Indicator") {print $(i+2); exit}}')
        [ -z "$reallocated" ] && reallocated=0
        [ -z "$pending" ] && pending=0
        [ -z "$offline" ] && offline=0
        [ -z "$reported" ] && reported=0
        [ -z "$udma" ] && udma=0
        total_err=$(( reallocated + pending + offline + reported + udma ))
        wear="${wear_v:--}"
        [ "$total_err" -ge "$DRIVE_ERR_FAIL_THRESHOLD" ] && drv_status="FAIL"
        if [ -n "$wear_v" ] && [ "$wear_v" != "-" ]; then
            wear_num=$(( 10#$wear_v )) 2>/dev/null || wear_num=100
            [ "$wear_num" -le "$DRIVE_WEAR_FAIL_THRESHOLD" ] && drv_status="FAIL"
        fi
    fi

    [ ${#model_short} -gt 22 ] && model_short="${model_short:0:22}"

    # Build fail reason for this drive
    local fail_reason=""
    if [ "$drv_status" = "FAIL" ]; then
        if [ -n "$wear_v" ] && [ "$wear_v" != "-" ]; then
            wear_num=$(( 10#$wear_v )) 2>/dev/null || wear_num=100
            [ "$wear_num" -le "$DRIVE_WEAR_FAIL_THRESHOLD" ] && fail_reason="wear ${wear_num}% remaining"
        fi
        [ "$total_err" -ge "$DRIVE_ERR_FAIL_THRESHOLD" ] && fail_reason="${fail_reason:+$fail_reason, }${total_err} errors"
        [ -z "$fail_reason" ] && fail_reason="check drive table"
    fi

    DRV_DEV+=("$dev")
    DRV_SLOT+=("$slot")
    DRV_DID+=("$megaraid_did")
    DRV_DG+=("$dg")
    DRV_SIZE+=("$size")
    DRV_STATE+=("$state_in")
    DRV_ERR+=("$total_err")
    DRV_WEAR+=("$wear")
    DRV_MODEL+=("$model_short")
    DRV_SERIAL+=("$serial")
    DRV_STATUS+=("$drv_status")
    DRV_FW+=("$fw_ver")
    if [ "$drv_status" = "FAIL" ]; then
        DRIVE_FAIL_COUNT=$(( DRIVE_FAIL_COUNT + 1 ))
        FAILS=$(( FAILS + 1 ))
        local drive_label="${slot:-${dev##*/}}"
        local serial_suffix=""
        [ -n "$serial" ] && [ "$serial" != "(unknown)" ] && serial_suffix="  S/N: ${serial}"
        FAIL_ITEMS+=("Drive ${drive_label}|${model_short} -- ${fail_reason}${serial_suffix}")
    fi
}

if $IS_REAL_RAID; then
    while IFS='|' read -r eid_slt did state dg pd_size; do
        collect_drive "" "$eid_slt" "$dg" "$did" "$state" "$pd_size"
    done < <(storcli /call/eall/sall show 2>/dev/null | awk '/[0-9]+:[0-9]+/ && /UGood|Onln|Offln|UBad|GHS|DHS|JBOD/ {size=$5$6; printf "%s|%s|%s|%s|%s\n", $1, $2, $3, $4, size}')

    # Build DG -> block device map.
    # storcli presents VDs to the OS as /dev/sdX in the order they appear.
    # Parse DG number from each "DG/VD" row and match positionally to lsblk disks.
    declare -A DG_TO_BLOCKDEV=()
    _vd_dgs=()
    while IFS= read -r _vd_line; do
        _dg=$(echo "$_vd_line" | grep -oE '^[[:space:]]*[0-9]+' | tr -d ' ')
        [ -n "$_dg" ] && _vd_dgs+=("$_dg")
    done < <(storcli /call/vall show 2>/dev/null | grep -E '^ *[0-9]+/[0-9]+')
    _blk_devs=()
    while IFS= read -r _blkline; do
        _blk_devs+=("/dev/$(echo "$_blkline" | awk '{print $1}')")
    done < <(lsblk -dno NAME,TYPE 2>/dev/null | awk '$2=="disk"' | sort)
    for _vi in "${!_vd_dgs[@]}"; do
        [ -n "${_blk_devs[$_vi]}" ] && DG_TO_BLOCKDEV["${_vd_dgs[$_vi]}"]="${_blk_devs[$_vi]}"
    done
    unset _vd_dgs _blk_devs _vi _vd_line _blkline _dg

    # Also collect NVMe drives -- never behind RAID controller, always direct
    while IFS= read -r dev; do
        [ -b "$dev" ] || continue
        collect_drive "$dev" "$(basename "$dev")" "-" "" "NVMe"
    done < <(lsblk -dno NAME,TYPE 2>/dev/null | awk '$2=="disk" && $1~/^nvme/{print "/dev/"$1}' | sort)
else
    while IFS= read -r dev; do
        [ -b "$dev" ] || continue
        collect_drive "$dev" "$(basename "$dev")" "-" "" "Direct"
    done < <(lsblk -dno NAME,TYPE 2>/dev/null | awk '$2=="disk"{print "/dev/"$1}' | sort)
fi

[ "$RAM_VERDICT" = "FAIL" ] && FAIL_ITEMS+=("RAM|${RAM_FAIL_REASON}")
[ "$RAM_VERDICT" = "WARN" ] && WARN_ITEMS+=("RAM|${RAM_FAIL_REASON}")

declare -a _EMPTY_DISKS=()
_DISKS_RAW=$(lsblk -rno NAME,TYPE,FSTYPE -e 7,1,2 2>/dev/null)
if [ -n "$_DISKS_RAW" ]; then
    while IFS=' ' read -r _diskname _disktype _diskfs; do
        [ -z "$_diskname" ] && continue
        _child_count=$(lsblk -lno NAME "/dev/$_diskname" 2>/dev/null | grep -v "^${_diskname}$" | wc -l)
        if [ "$_child_count" -eq 0 ] && [ -z "$_diskfs" ]; then
            # Skip NVMe drives from empty disk warning
            echo "$_diskname" | grep -q "^nvme" && continue
            _EMPTY_DISKS+=("$_diskname")
        fi
    done < <(printf '%s\n' "$_DISKS_RAW" | awk '$2=="disk"{print $1" "$2" "$3}')
fi
unset _DISKS_RAW _diskname _disktype _diskfs _child_count
_EMPTY_DISK_COUNT=${#_EMPTY_DISKS[@]}

if [ "$_EMPTY_DISK_COUNT" -gt 0 ]; then
    EMPTY_DISKS_VERDICT="WARN"
    _EMPTY_LIST=$(IFS=','; echo "${_EMPTY_DISKS[*]}")
    # Truncate drive list if too long to fit in box
    _MAX_LIST=$(( W - 50 ))
    if [ ${#_EMPTY_LIST} -gt $_MAX_LIST ]; then
        _EMPTY_LIST="${_EMPTY_LIST:0:$(( _MAX_LIST - 3 ))}..."
    fi
    EMPTY_DISKS_DETAIL="${_EMPTY_DISK_COUNT} drive(s) with no partitions: ${_EMPTY_LIST}"
    WARN_ITEMS+=("Empty Drv|${EMPTY_DISKS_DETAIL}")
fi

exec 2>&1

box_open "SERVER SUMMARY -- ${HOSTNAME}" "$C"

bline "$(printf "  %b%-10s%b  %b%s%b" "$BOLD" "IP:" "$RST" "$C" "$IP_DISPLAY" "$RST")"
[ -n "$ETH0_MAC" ] && bline "$(printf "  %b%-10s%b  %b%s%b" "$BOLD" "${MAC_IFACE^^} MAC:" "$RST" "$DIM" "$ETH0_MAC" "$RST")"
bline "$(printf "  %b%-10s%b  %b%s%b" "$BOLD" "OS:" "$RST" "$M" "${OS_NAME:-unknown}" "$RST")"
bline "$(printf "  %b%-10s%b  %b%s%b" "$BOLD" "Run:" "$RST" "$DIM" "$RUN_TIME" "$RST")"
bdiv

bdiv
bline "$(printf "  %b%-10s%b %b%s %s%b" "$BOLD" "Board" "$RST" "$DIM" "$BOARD_MFG" "$BOARD_RAW" "$RST")"
bline "$(printf "  %b%-10s%b %b%s%b" "$BOLD" "CPU" "$RST" "$DIM" "$CPU_DETAIL" "$RST")"
_tz_vis=$(timedatectl 2>/dev/null | grep 'Time zone' | awk '{print $3,$4,$5}' | xargs); [ -n "$_DC_NAME" ] && [ "$_DC_NAME" != "Unknown" ] && _tz_vis="${_tz_vis}  ($_DC_NAME)"; bline "$(printf "  %b%-10s%b %b%s%b" "$BOLD" "Timezone" "$RST" "$DIM" "$_tz_vis" "$RST")"

RAM_TOTAL_GB=0
while IFS='|' read -r count size type speed mfg part; do
    [ -z "$count" ] && continue
    _sz=$(echo "$size" | sed -E 's/[[:space:]]//g')
    _gb=0
    echo "$_sz" | grep -qi "GB" && _gb=$(echo "$_sz" | grep -oE '[0-9]+')
    echo "$_sz" | grep -qi "MB" && _gb=0
    [ -n "$_gb" ] && RAM_TOTAL_GB=$(( RAM_TOTAL_GB + count * _gb ))
done <<< "$DIMM_GROUPS"
[ "$RAM_TOTAL_GB" -ge 1024 ] 2>/dev/null && RAM_TOTAL_STR="$(awk "BEGIN{printf \"%.0fTB\", $RAM_TOTAL_GB/1024}")" || RAM_TOTAL_STR="${RAM_TOTAL_GB}GB"
[ "$DIMM_UNIQUE" -gt 1 ] && RAM_MATCH_TAG="(MISMATCH)" || RAM_MATCH_TAG="(MATCHING)"

first_ram=true
for line in "${RAM_DISPLAY_LINES[@]}"; do
    if $first_ram; then
        type_suffix=""
        [ -n "$RAM_TYPE_TAG" ] && type_suffix="  ${RAM_TYPE_TAG}"
        verdict_row "RAM" "${RAM_TOTAL_STR}  ${RAM_MATCH_TAG}  ${line}${type_suffix}" "$RAM_VERDICT"
        first_ram=false
    else
        cont_row "${line}"
    fi
done

verdict_row "BIOS"     "$BIOS_DETAIL"     "$BIOS_VERDICT"
verdict_row "IPMI FW"  "$IPMI_DETAIL"     "$IPMI_VERDICT"
verdict_row "RAID"     "$RAID_DETAIL"     "$RAID_VERDICT"

if $IS_REAL_RAID; then
    if [ "$BBU_CV_VERDICT" = "FAIL" ]; then
        verdict_row "BBU/CV" "$BBU_CV_DETAIL" "FAIL"
        FAILS=$(( FAILS + 1 ))
    elif [ "$BBU_CV_VERDICT" = "WARN" ]; then
        verdict_row "BBU/CV" "$BBU_CV_DETAIL" "WARN"
        WARNS=$(( WARNS + 1 ))
    else
        verdict_row "BBU/CV" "$BBU_CV_DETAIL" "PASS"
    fi
fi

verdict_row "Fan"      "$FAN_DETAIL"      "$FAN_VERDICT"
verdict_row "IPMI Net" "$IPMINET_DETAIL"  "$IPMINET_VERDICT"
[ "$PRIVNET_VERDICT" != "SKIP" ] && verdict_row "Private Net" "$PRIVNET_DETAIL" "$PRIVNET_VERDICT"

for nic_entry in "${NIC_LINES[@]}"; do
    IFS='|' read -r _nic _speed _link _lc <<< "$nic_entry"
    if [ "$_lc" = "$G" ]; then
        verdict_row "$_nic" "$_speed  up" "PASS"
    else
        verdict_row "$_nic" "$_speed  down" "FAIL"
        FAIL_ITEMS+=("${_nic}|link is down")
    fi
done
unset _nic _speed _link _lc

if $IS_REAL_RAID; then
    _VD_TMP=$(mktemp)
    storcli /c${CTRL_IDX}/vall show > "$_VD_TMP" 2>/dev/null
    _VD_ROWS=$(grep -E '^ *[0-9][0-9]*/[0-9]' "$_VD_TMP")
    if [ -z "$_VD_ROWS" ]; then
        storcli /call/vall show > "$_VD_TMP" 2>/dev/null
        _VD_ROWS=$(grep -E '^ *[0-9][0-9]*/[0-9]' "$_VD_TMP")
    fi
    _VD_HDR=$(grep 'DG/VD' "$_VD_TMP" | head -1)
    _VD_OUT=$(cat "$_VD_TMP")
    rm -f "$_VD_TMP"
    if [ -n "$_VD_ROWS" ]; then
        _VD_DIV=$(rep "$(_HD)" $((W-2)))
        bdiv
        bdiv
        bline "$(printf "  %b%s%b" "$BOLD" "$_VD_HDR" "$RST")"
        bdiv
        while IFS= read -r _vd_line; do
            [ -n "$_vd_line" ] && bline "$(printf "  %b%s%b" "$C" "$_vd_line" "$RST")"
        done < <(printf '%s\n' "$_VD_ROWS")
        bdiv
    fi
    unset _VD_OUT _VD_HDR _VD_ROWS _VD_DIV _vd_line
fi

bdiv
bline "$(printf "  %b%-15s %-10s %-8s %-22s %-10s %-5s %-5s%s%b" "$BOLD" "Device" "Size" "Type" "Model" "Firmware" "Err" "Wear" "Status" "$RST")"
bdiv

pass_idx=(); fail_idx=()
for i in "${!DRV_STATUS[@]}"; do
    [ "${DRV_STATUS[$i]}" = "PASS" ] && pass_idx+=($i) || fail_idx+=($i)
done

print_drive_with_parts() {
    local i=$1 dev="${DRV_DEV[$i]}" status="${DRV_STATUS[$i]}"
    local size="${DRV_SIZE[$i]}" model="${DRV_MODEL[$i]}" serial="${DRV_SERIAL[$i]}"
    local err="${DRV_ERR[$i]}" wear_raw="${DRV_WEAR[$i]}" fw="${DRV_FW[$i]:-}"
    # Format wear for display: strip leading zeros, add % suffix for numeric values
    local wear="$wear_raw"
    if [[ "$wear_raw" =~ ^[0-9]+$ ]]; then
        wear="$(( 10#$wear_raw ))%"
    fi
    local slot="${DRV_SLOT[$i]}"
    local label="${dev##*/}"
    [ -z "$label" ] && label="$slot"

    # Colour-code the status badge
    local st_color="$G"
    [ "$status" = "FAIL" ] && st_color="$R"
    [ "$status" = "WARN" ] && st_color="$Y"

    # Dynamic right-pad so status badge lines up at column W
    local left_part
    left_part=$(printf "  %-15s %-10s %-8s %-22s %-10s %-5s %-5s" \
        "$label" "$size" "disk" "$model" "${fw:--}" "$err" "$wear")
    local left_len=${#left_part}
    local status_pad=$(( W - left_len - 7 ))
    [ "$status_pad" -lt 0 ] && status_pad=0

    bline "$(printf "%b%s%${status_pad}s  %b%s%b " \
        "$BOLD" "$left_part" "" "$st_color" "$status" "$RST")"

    # Determine which block device to inspect for partitions.
    # Direct-attached: use $dev. RAID: look up the VD block device by DG number.
    local part_dev="$dev"
    if [ -z "$part_dev" ] || ! [ -b "$part_dev" ]; then
        local dg="${DRV_DG[$i]}"
        [ -n "$dg" ] && part_dev="${DG_TO_BLOCKDEV[$dg]:-}"
    fi

    # Print partition tree (silently skip if no partitions or device not found)
    if [ -n "$part_dev" ] && [ -b "$part_dev" ]; then
        local parts part_name part_size part_type part_fs tree_char
        parts=$(lsblk -lno NAME,SIZE,TYPE,FSTYPE "$part_dev" 2>/dev/null \
            | grep -v "^${part_dev##*/}[[:space:]]" \
            | grep -v "^VG_XenStorage" \
            | awk '{gsub(/^[[:space:]]+|[[:space:]]+$/,""); print}' \
            | grep -v "^$")
        if [ -n "$parts" ]; then
            local part_count part_num=0
            part_count=$(echo "$parts" | wc -l)
            while read -r part_name part_size part_type part_fs; do
                [ -z "$part_name" ] && continue
                part_num=$(( part_num + 1 ))
                [ "$part_num" -eq "$part_count" ] && tree_char="\\_" || tree_char="|_"
                [ -z "$part_fs" ] || [ "$part_fs" = "-" ] && part_fs=""
                bline "$(printf "  %b  %s%-12s %-8s %-8s%s%b" \
                    "$DIM" "$tree_char" "$part_name" "$part_size" "$part_type" "$part_fs" "$RST")"
            done <<< "$parts"
            
            # Show LVM VG summary with LV counts if VGs exist
            if command -v vgdisplay >/dev/null 2>&1; then
                vgdisplay 2>/dev/null | awk '
                    /^  VG Name/ {vg=$NF}
                    /^  Cur LV/ {lv=$NF; if(vg) print vg " with " lv " LV" (lv>1?"s":"")}
                ' | while IFS= read -r vg_line; do
                    [ -n "$vg_line" ] && bline "$(printf "    %b%s%b" "$DIM" "$vg_line" "$RST")"
                done
            fi
        fi
    fi
}

for i in "${pass_idx[@]}"; do print_drive_with_parts $i; done
if [ "${#fail_idx[@]}" -gt 0 ]; then
    bdiv
    for i in "${fail_idx[@]}"; do print_drive_with_parts $i; done
fi

bdiv
if [ "$SEL_TOTAL" -eq 0 ]; then
    verdict_row "SEL" "no events" "PASS"
else
    _sel_summary="${SEL_TOTAL} event(s)"
    [ "$SEL_CRIT_COUNT" -gt 0 ] && _sel_summary="${_sel_summary}  ${SEL_CRIT_COUNT} critical"
    [ "$SEL_WARN_COUNT" -gt 0 ] && _sel_summary="${_sel_summary}  ${SEL_WARN_COUNT} warning"
    [ "$SEL_INFO_COUNT" -gt 0 ] && _sel_summary="${_sel_summary}  ${SEL_INFO_COUNT} info"
    if [ "$SEL_CRIT_COUNT" -gt 0 ]; then
        verdict_row "SEL" "$_sel_summary" "FAIL"
        FAIL_ITEMS+=("SEL|${SEL_CRIT_COUNT} critical event(s) in system event log")
    else
        verdict_row "SEL" "$_sel_summary" "WARN"
        WARN_ITEMS+=("SEL|${SEL_TOTAL} event(s) in system event log")
    fi
    printf '%s\n' "$SEL_JOINED" | head -10 | while IFS= read -r _sl; do
        [ -z "$_sl" ] && continue
        _sv=$(printf '%s' "$_sl" | sed -E 's/\x1b\[[0-9;:]*[mGKHFJA-Za-z]//g')
        [ ${#_sv} -gt $(( W - 4 )) ] && _sl="${_sv:0:$(( W - 7 ))}..."
        if echo "$_sl" | grep -qiE "\|[[:space:]]*Critical[[:space:]]*\|"; then
            bline "$(printf "  %b%s%b" "$R" "$_sl" "$RST")"
        elif echo "$_sl" | grep -qiE "\|[[:space:]]*Warning[[:space:]]*\|"; then
            bline "$(printf "  %b%s%b" "$Y" "$_sl" "$RST")"
        else
            bline "$(printf "  %b%s%b" "$DIM" "$_sl" "$RST")"
        fi
    done
fi

if [ -z "$SDR_LINES" ]; then
    verdict_row "SDR" "all sensors OK" "PASS"
else
    verdict_row "SDR" "sensor alerts detected" "FAIL"
    # Build a compact list of failing sensor names for the summary table
    _sdr_names=$(printf '%s\n' "$SDR_LINES" | head -5 | while IFS= read -r _sl; do
        _name=$(printf '%s' "$_sl" | awk -F'|' '{gsub(/^[ \t]+|[ \t]+$/,"",$2); gsub(/\([0-9]+\)[[:space:]]*/,"",$2); print $2}' | xargs)
        [ -n "$_name" ] && printf '%s; ' "$_name"
    done | sed 's/; $//')
    [ $(printf '%s\n' "$SDR_LINES" | wc -l) -gt 5 ] && _sdr_names="${_sdr_names} (+more)"
    FAIL_ITEMS+=("SDR|${_sdr_names}")
    printf '%s\n' "$SDR_LINES" | head -10 | while IFS= read -r sdr_line; do
        _sv=$(printf '%s' "$sdr_line" | sed -E 's/\x1b\[[0-9;:]*[mGKHFJA-Za-z]//g')
        [ ${#_sv} -gt $(( W - 4 )) ] && sdr_line="${_sv:0:$(( W - 7 ))}..."
        bline "$(printf "  %b%s%b" "$R" "$sdr_line" "$RST")"
    done
fi

if [ "${EMPTY_DISKS_VERDICT:-}" = "WARN" ]; then
    verdict_row "Empty Drv" "$EMPTY_DISKS_DETAIL" "WARN"
fi

bdiv
if [ "$FAILS" -gt 0 ]; then
    ob=$(badge_fail); od="${FAILS} fail, ${WARNS} warn, ${PASSES} ok"; EXIT_CODE=2
elif [ "$WARNS" -gt 0 ]; then
    ob=$(badge_warn); od="${WARNS} warn, ${PASSES} ok"; EXIT_CODE=1
else
    ob="${BGG}${BLK}${BOLD} PASS ${RST}"; od="${PASSES} checks passed"; EXIT_CODE=0
fi
bline "$(printf "  %b%-10s%b %b%s%b  %b" "$BOLD" "OVERALL" "$RST" "$DIM" "$od" "$RST" "$ob")"

if [ "${#FAIL_ITEMS[@]}" -gt 0 ] || [ "${#WARN_ITEMS[@]}" -gt 0 ]; then
    bdiv
    bline "$(printf "  %b%-6s  %-12s  %s%b" "$BOLD" "Status" "Item" "Reason" "$RST")"
    bdiv
    for entry in "${FAIL_ITEMS[@]}"; do
        item="${entry%%|*}"; reason="${entry##*|}"
        # Truncate reason if it would overflow the box
        max_reason=$(( W - 26 ))
        [ ${#reason} -gt $max_reason ] && reason="${reason:0:$(( max_reason - 3 ))}..."
        bline "$(printf "  %b%-6s%b  %b%-12s%b  %b%s%b" "$R" "FAIL" "$RST" "$BOLD" "$item" "$RST" "$DIM" "$reason" "$RST")"
    done
    for entry in "${WARN_ITEMS[@]}"; do
        item="${entry%%|*}"; reason="${entry##*|}"
        max_reason=$(( W - 26 ))
        [ ${#reason} -gt $max_reason ] && reason="${reason:0:$(( max_reason - 3 ))}..."
        bline "$(printf "  %b%-6s%b  %b%-12s%b  %b%s%b" "$Y" "WARN" "$RST" "$BOLD" "$item" "$RST" "$DIM" "$reason" "$RST")"
    done
fi

box_close "$C"


exit "${EXIT_CODE:-0}"