diff --git a/scripts/system_health_report.sh b/scripts/system_health_report.sh new file mode 100644 index 0000000..7dad001 --- /dev/null +++ b/scripts/system_health_report.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +# +# Generate a concise system health report for Linux servers. +# +# This script is useful for incident triage, routine maintenance, and +# collecting baseline information before deployments or infrastructure changes. +# +# Usage: +# bash system_health_report.sh +# bash system_health_report.sh --output /tmp/health-report.txt +# bash system_health_report.sh --service nginx --service docker +# +# Options: +# --output FILE Write the report to FILE instead of stdout. +# --service NAME Include systemd status for a service. Can be repeated. +# --help Show this help message. + +set -Eeuo pipefail + +output_file="" +services=() + +usage() { + sed -n '2,18p' "$0" | sed 's/^# \{0,1\}//' +} + +require_command() { + local command_name="$1" + if ! command -v "$command_name" >/dev/null 2>&1; then + printf 'ERROR: required command not found: %s\n' "$command_name" >&2 + exit 1 + fi +} + +section() { + printf '\n## %s\n' "$1" +} + +collect_report() { + section "Host" + printf 'Hostname: %s\n' "$(hostname -f 2>/dev/null || hostname)" + printf 'Kernel: %s\n' "$(uname -srmo)" + printf 'Uptime: %s\n' "$(uptime -p 2>/dev/null || uptime)" + printf 'Date: %s\n' "$(date -Is)" + + section "CPU" + if command -v lscpu >/dev/null 2>&1; then + lscpu | awk -F: '/Model name|CPU\(s\)|Thread|Core|Socket/ {gsub(/^[ \t]+/, "", $2); print $1 ": " $2}' + else + grep -m1 'model name' /proc/cpuinfo || true + fi + printf 'Load average: %s\n' "$(awk '{print $1, $2, $3}' /proc/loadavg)" + + section "Memory" + free -h + + section "Disk" + df -hT -x tmpfs -x devtmpfs + + section "Network" + ip -brief address 2>/dev/null || hostname -I || true + + section "Top Processes By Memory" + ps -eo pid,ppid,comm,%mem,%cpu --sort=-%mem | head -n 11 + + if [[ "${#services[@]}" -gt 0 ]]; then + section "Service Status" + for service in "${services[@]}"; do + printf '\nService: %s\n' "$service" + systemctl is-active "$service" 2>/dev/null || true + systemctl --no-pager --lines=5 status "$service" 2>/dev/null || true + done + fi +} + +while [[ "$#" -gt 0 ]]; do + case "$1" in + --output) + output_file="${2:-}" + [[ -n "$output_file" ]] || { printf 'ERROR: --output requires a file path.\n' >&2; exit 1; } + shift 2 + ;; + --service) + [[ -n "${2:-}" ]] || { printf 'ERROR: --service requires a service name.\n' >&2; exit 1; } + services+=("$2") + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + printf 'ERROR: unknown option: %s\n' "$1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +require_command free +require_command df +require_command ps + +if [[ -n "$output_file" ]]; then + collect_report > "$output_file" + printf 'Health report written to %s\n' "$output_file" +else + collect_report +fi +