#!/bin/bash
#
# cwp-ssl-expiry — SSL certificate expiry monitor
#
# Part of CloudWatch Pro (CWP) v1. Runs once a day on each cPanel server.
# Walks every installed SSL cert in /var/cpanel/ssl/installed/certs/, parses
# expiry date + covered domains via openssl x509, flags certs expiring soon.
# Emits findings + sends a single digest email if anything is in danger zone.
#
# Detect-only. Does NOT renew certs, trigger AutoSSL, or take action.
# Operator's job: investigate why AutoSSL didn't auto-renew, or run manually.
#
# THRESHOLDS:
#   Already expired                        → P1
#   Expires in <= EXPIRE_CRIT_DAYS  (3)    → P1
#   Expires in <= EXPIRE_WARN_DAYS  (14)   → P2
#
# DUPLICATE BEHAVIOUR:
#   This module re-emits findings DAILY for any cert still in danger zone.
#   No cooldown. SSL needs repeat reminders until fixed (you may be on PTO
#   when the first alert fires; we want you to see it on day 2 also).
#   Dedup happens at the dashboard layer (group by cert subject).
#
# INSTALL:
#   sudo install -d /opt/cwp/agent/modules/ssl-expiry \
#                   /etc/cwp \
#                   /var/cwp/state/ssl-expiry \
#                   /var/cwp/findings \
#                   /var/log/cwp
#   sudo install -m 0755 cwp-ssl-expiry /opt/cwp/agent/modules/ssl-expiry/
#   sudo install -m 0644 config.example.conf /etc/cwp/ssl-expiry.conf
#   sudo $EDITOR /etc/cwp/ssl-expiry.conf
#   sudo crontab -l 2>/dev/null | { cat; cat cron.example; } | sudo crontab -
#
# USAGE:
#   cwp-ssl-expiry                    # normal run
#   cwp-ssl-expiry --dry-run          # report only, no findings/email
#   cwp-ssl-expiry --verbose          # log to stderr
#   cwp-ssl-expiry --no-email         # write findings, skip email
#   cwp-ssl-expiry --cert-dir <path>  # override cert dir (testing)
#   cwp-ssl-expiry --version

set -euo pipefail

VERSION="0.1.0"
SCRIPT_NAME="cwp-ssl-expiry"

CONFIG_FILE="${CWP_SSL_EXPIRY_CONFIG:-/etc/cwp/ssl-expiry.conf}"
STATE_DIR="/var/cwp/state/ssl-expiry"
FINDINGS_DIR="/var/cwp/findings"
LOG_FILE="/var/log/cwp/ssl-expiry.log"
ALERT_EMAIL="root@localhost"
SERVER_NAME="$(hostname -f 2>/dev/null || hostname)"
SENDMAIL_BIN="/usr/sbin/sendmail"

CERT_DIR="/var/cpanel/ssl/installed/certs"
EXPIRE_WARN_DAYS=14
EXPIRE_CRIT_DAYS=3

DRY_RUN=0
VERBOSE=0
NO_EMAIL=0
CERT_DIR_OVERRIDE=""

# ---- helpers ----
log() {
  local level="$1"; shift
  local ts; ts="$(date '+%Y-%m-%d %H:%M:%S')"
  printf '%s [%s] %s\n' "$ts" "$level" "$*" >> "$LOG_FILE" 2>/dev/null || true
  if [[ "$VERBOSE" -eq 1 ]] || [[ "$level" == "ERROR" ]]; then
    printf '%s [%s] %s\n' "$ts" "$level" "$*" >&2
  fi
}
die() { log "ERROR" "$*"; exit 1; }
usage() { sed -n '1,40p' "$0" | sed 's/^# \{0,1\}//'; exit "${1:-0}"; }

load_config() {
  # Shared CWP defaults (ALERT_EMAIL, SERVER_NAME, etc.) sourced first.
  if [[ -r /etc/cwp/common.conf ]]; then
    # shellcheck source=/dev/null
    . /etc/cwp/common.conf
  fi
  if [[ -r "$CONFIG_FILE" ]]; then
    # shellcheck source=/dev/null
    . "$CONFIG_FILE"
    log "INFO" "loaded config from $CONFIG_FILE"
  else
    log "WARN" "no config at $CONFIG_FILE — using built-in defaults"
  fi
  if [[ -n "$CERT_DIR_OVERRIDE" ]]; then
    CERT_DIR="$CERT_DIR_OVERRIDE"
  fi
  return 0
}

ensure_dirs() {
  for d in "$STATE_DIR" "$FINDINGS_DIR" "$(dirname "$LOG_FILE")"; do
    if [[ ! -d "$d" ]]; then
      mkdir -p "$d" 2>/dev/null || die "cannot create $d"
    fi
  done
}

preflight() {
  command -v openssl >/dev/null || die "openssl not found"
  if [[ "$NO_EMAIL" -eq 0 ]] && [[ ! -x "$SENDMAIL_BIN" ]]; then
    log "WARN" "$SENDMAIL_BIN not found — email will be skipped"
  fi
}

json_escape() {
  local s="$1"
  s="${s//\\/\\\\}"; s="${s//\"/\\\"}"
  s="${s//$'\n'/\\n}"; s="${s//$'\t'/\\t}"; s="${s//$'\r'/\\r}"
  printf '%s' "$s"
}

# epoch_from_date <openssl-date-string> — portable date conversion
# openssl emits: "May 15 12:34:56 2026 GMT"
# GNU date understands directly. BSD date needs explicit format.
epoch_from_date() {
  local d="$1"
  date -d "$d" +%s 2>/dev/null && return
  date -j -f "%b %e %H:%M:%S %Y %Z" "$d" +%s 2>/dev/null && return
  echo ""
}

# extract_subject_cn <cert_path>
extract_subject_cn() {
  openssl x509 -in "$1" -noout -subject 2>/dev/null \
    | sed -n 's/.*CN[[:space:]]*=[[:space:]]*\([^,/]*\).*/\1/p' \
    | head -1
}

# extract_sans <cert_path> — returns comma-separated SAN list
extract_sans() {
  openssl x509 -in "$1" -noout -text 2>/dev/null \
    | awk '/X509v3 Subject Alternative Name/{getline; print; exit}' \
    | sed 's/[[:space:]]//g; s/DNS://g'
}

# emit_finding <cert_path> <subject_cn> <sans> <days_left> <severity>
emit_finding() {
  local cert_path="$1" cn="$2" sans="$3" days="$4" sev="$5"
  local now_iso now_epoch finding_file id action status

  now_epoch="$(date +%s)"
  now_iso="$(date '+%Y-%m-%dT%H:%M:%S%z')"
  id="ssl-expiry-${SERVER_NAME}-$(printf '%s' "$cert_path" | md5sum 2>/dev/null | awk '{print $1}' | head -c 16 || echo "${cert_path//[^a-zA-Z0-9]/_}")"
  finding_file="$FINDINGS_DIR/findings.jsonl"

  if (( days < 0 )); then
    status="EXPIRED ${days#-} days ago"
  else
    status="expires in ${days} day(s)"
  fi

  action="Cert ${status}: ${cn} (covers: ${sans:-just CN}). Cert file: ${cert_path}. If AutoSSL is enabled, force renewal: whmapi1 start_autossl_check_for_user user=<owner>. Identify cert owner: grep -l '${cn}' /var/cpanel/ssl/installed/registry/* 2>/dev/null. If not AutoSSL-managed, renew via the original CA (Let's Encrypt: certbot renew; commercial: contact CA)."

  local cn_esc sans_esc action_esc cert_esc
  cn_esc="$(json_escape "$cn")"
  sans_esc="$(json_escape "$sans")"
  action_esc="$(json_escape "$action")"
  cert_esc="$(json_escape "$cert_path")"

  local json
  json=$(printf '{"ts":"%s","ts_epoch":%d,"module":"%s","server":"%s","severity":"%s","metric":"%s","subject_cn":"%s","sans":"%s","days_left":%d,"cert_path":"%s","status":"%s","id":"%s","recommended_action":"%s"}' \
    "$now_iso" "$now_epoch" "ssl-expiry" "$SERVER_NAME" "$sev" \
    "ssl_cert_expiry" "$cn_esc" "$sans_esc" "$days" "$cert_esc" \
    "$status" "$id" "$action_esc")

  if [[ "$DRY_RUN" -eq 1 ]]; then
    printf 'DRY-RUN finding: %s\n' "$json"
  else
    printf '%s\n' "$json" >> "$finding_file"
  fi

  DIGEST_LINES+=("[$sev] ${cn}  ${status}  (sans: ${sans:-none})")
  if [[ "$sev" == "P1" ]]; then HAS_P1=1; fi
}

# send_alert_email
send_alert_email() {
  local count="${#DIGEST_LINES[@]}"
  if (( count == 0 )); then
    log "INFO" "no certs in danger zone — email NOT sent"
    return
  fi
  if [[ "$NO_EMAIL" -eq 1 ]]; then
    log "INFO" "$count finding(s), --no-email — email skipped"
    return
  fi

  local subject body now_str findings_block sev_tag
  now_str="$(date '+%Y-%m-%d %H:%M:%S %Z')"
  findings_block="$(printf '%s\n' "${DIGEST_LINES[@]}")"
  sev_tag="P2"
  [[ "$HAS_P1" -eq 1 ]] && sev_tag="P1"
  subject="[CWP ${sev_tag}] ssl-expiry: ${count} cert(s) need renewal on ${SERVER_NAME}"

  IFS='' read -r -d '' body <<EOF || true
CloudWatch Pro — SSL Certificate Expiry

Server:    ${SERVER_NAME}
Run time:  ${now_str}
At-risk certs: ${count}
Thresholds: P1 if expired or <= ${EXPIRE_CRIT_DAYS} days, P2 if <= ${EXPIRE_WARN_DAYS} days

----- certs needing attention -----

${findings_block}

----- next steps -----

  1. AutoSSL-managed certs (most cPanel installs):
       Identify the owner:
           grep -l '<domain>' /var/cpanel/ssl/installed/registry/* 2>/dev/null
       Force a check:
           whmapi1 start_autossl_check_for_user user=<owner>
       Or fleet-wide:
           whmapi1 start_autossl_check_for_all_users

  2. Manual / commercial certs:
       Renew at the original CA. Replace the cert via WHM > SSL/TLS Manager.

  3. If a cert keeps failing to renew via AutoSSL, common causes:
       - HTTP-01 challenge can't reach /.well-known/acme-challenge/
         (check .htaccess, Cloudflare proxy, redirects)
       - Domain doesn't resolve to this server
       - cPanel hostname rate-limited at Let's Encrypt
       Check AutoSSL log:
           tail -200 /var/cpanel/logs/autossl.log

  4. After renewal, this script will not re-alert (cert no longer in danger zone).

This module is detect-only. CWP did NOT renew, replace, or modify any cert.

Findings file: /var/cwp/findings/findings.jsonl
SSL log:       /var/log/cwp/ssl-expiry.log
EOF

  if [[ "$DRY_RUN" -eq 1 ]]; then
    printf 'DRY-RUN email to %s:\n  Subject: %s\n%s\n' "$ALERT_EMAIL" "$subject" "$body"
    return
  fi
  if [[ ! -x "$SENDMAIL_BIN" ]]; then
    log "WARN" "sendmail not available; alert email NOT sent"
    return
  fi

  {
    printf 'To: %s\n' "$ALERT_EMAIL"
    printf 'From: cwp-agent@%s\n' "$SERVER_NAME"
    printf 'Subject: %s\n' "$subject"
    printf 'X-CWP-Module: ssl-expiry\n'
    printf 'X-CWP-Severity: %s\n' "$sev_tag"
    printf 'Content-Type: text/plain; charset=utf-8\n'
    printf '\n%s\n' "$body"
  } | "$SENDMAIL_BIN" -t -i

  log "INFO" "alert email sent to $ALERT_EMAIL with $count finding(s)"
}

# ---- argument parsing ----
while [[ $# -gt 0 ]]; do
  case "$1" in
    --dry-run)    DRY_RUN=1; shift ;;
    --verbose)    VERBOSE=1; shift ;;
    --no-email)   NO_EMAIL=1; shift ;;
    --cert-dir)   CERT_DIR_OVERRIDE="$2"; shift 2 ;;
    --version)    printf '%s %s\n' "$SCRIPT_NAME" "$VERSION"; exit 0 ;;
    -h|--help)    usage 0 ;;
    *)            printf 'unknown argument: %s\n' "$1" >&2; usage 2 ;;
  esac
done

# ---- main ----
load_config
ensure_dirs
preflight

DIGEST_LINES=()
HAS_P1=0

log "INFO" "$SCRIPT_NAME v$VERSION starting (server=$SERVER_NAME, cert_dir=$CERT_DIR, dry_run=$DRY_RUN)"

if [[ ! -d "$CERT_DIR" ]]; then
  log "WARN" "cert dir $CERT_DIR does not exist — nothing to scan"
  exit 0
fi

now_epoch=$(date +%s)
total_certs=0
total_warn=0
total_crit=0

# Iterate every .crt file in the cert dir
shopt -s nullglob
for cert_path in "$CERT_DIR"/*.crt; do
  [[ -r "$cert_path" ]] || continue
  total_certs=$((total_certs + 1))

  enddate_raw="$(openssl x509 -in "$cert_path" -noout -enddate 2>/dev/null | sed 's/notAfter=//')"
  if [[ -z "$enddate_raw" ]]; then
    log "WARN" "cannot read enddate from $cert_path — skipping"
    continue
  fi

  end_epoch="$(epoch_from_date "$enddate_raw")"
  if [[ -z "$end_epoch" ]]; then
    log "WARN" "cannot parse enddate '$enddate_raw' from $cert_path — skipping"
    continue
  fi

  days_left=$(( (end_epoch - now_epoch) / 86400 ))

  if (( days_left > EXPIRE_WARN_DAYS )); then
    log "INFO" "OK: $cert_path expires in $days_left days"
    continue
  fi

  cn="$(extract_subject_cn "$cert_path")"
  sans="$(extract_sans "$cert_path")"
  cn="${cn:-unknown}"

  if (( days_left < 0 )) || (( days_left <= EXPIRE_CRIT_DAYS )); then
    emit_finding "$cert_path" "$cn" "$sans" "$days_left" "P1"
    total_crit=$((total_crit + 1))
  else
    emit_finding "$cert_path" "$cn" "$sans" "$days_left" "P2"
    total_warn=$((total_warn + 1))
  fi
done
shopt -u nullglob

send_alert_email

log "INFO" "$SCRIPT_NAME complete: scanned=$total_certs critical=$total_crit warning=$total_warn ok=$((total_certs - total_crit - total_warn))"
exit 0
