#!/bin/bash
#
# cwp-mail-anomaly — outbound mail anomaly detector
#
# Part of CloudWatch Pro (CWP) v1. Runs on each cPanel server. Parses
# /var/log/exim_mainlog for the last hour, counts outbound messages per
# cPanel account, flags accounts above threshold. Emits findings to the
# CWP findings queue and sends an email alert to the operator.
#
# Detect-only. Does NOT suspend, block, or modify anything.
#
# Complements WHM > Mail > Mail Rate Limits (which silently blocks).
# This script gives you visibility + alerts on top.
#
# INSTALL (on each cPanel server, after Week 0 is done):
#   sudo install -d /opt/cwp/agent/modules/mail-anomaly /etc/cwp /var/cwp/state/mail-anomaly /var/cwp/findings /var/log/cwp
#   sudo install -m 0755 cwp-mail-anomaly /opt/cwp/agent/modules/mail-anomaly/cwp-mail-anomaly
#   sudo install -m 0644 config.example.conf /etc/cwp/mail-anomaly.conf
#   sudo $EDITOR /etc/cwp/mail-anomaly.conf   # set ALERT_EMAIL, SERVER_NAME
#   sudo crontab -l 2>/dev/null | { cat; cat cron.example; } | sudo crontab -
#
# USAGE:
#   cwp-mail-anomaly                    # normal scan, hourly via cron
#   cwp-mail-anomaly --dry-run          # scan + report, no findings file write, no email
#   cwp-mail-anomaly --verbose          # log everything to stderr
#   cwp-mail-anomaly --window 7200      # custom window in seconds (default 3600)
#   cwp-mail-anomaly --version
#
# EXIT CODES:
#   0 — scan completed (anomalies may or may not have been found)
#   1 — config or environment error (see /var/log/cwp/mail-anomaly.log)
#   2 — invalid arguments
#
# REQUIREMENTS:
#   bash 4+, awk, date (GNU date — i.e., Linux), sendmail in PATH
#   read access to $EXIM_LOG_PATH, write access to $STATE_DIR + $FINDINGS_DIR

set -euo pipefail

VERSION="0.1.0"
SCRIPT_NAME="cwp-mail-anomaly"

# ---- defaults (overridden by /etc/cwp/mail-anomaly.conf) ----
CONFIG_FILE="${CWP_MAIL_ANOMALY_CONFIG:-/etc/cwp/mail-anomaly.conf}"
EXIM_LOG_PATH="/var/log/exim_mainlog"
STATE_DIR="/var/cwp/state/mail-anomaly"
FINDINGS_DIR="/var/cwp/findings"
LOG_FILE="/var/log/cwp/mail-anomaly.log"
ALERT_EMAIL="root@localhost"
SERVER_NAME="$(hostname -f 2>/dev/null || hostname)"
THRESHOLD_HOURLY=50
WINDOW_SECONDS=3600
ALERT_COOLDOWN_MINUTES=60
SENDMAIL_BIN="/usr/sbin/sendmail"

DRY_RUN=0
VERBOSE=0

# ---- helpers ----

log() {
  local level="$1"; shift
  local msg="$*"
  local ts
  ts="$(date '+%Y-%m-%d %H:%M:%S')"
  printf '%s [%s] %s\n' "$ts" "$level" "$msg" >> "$LOG_FILE" 2>/dev/null || true
  if [[ "$VERBOSE" -eq 1 ]] || [[ "$level" == "ERROR" ]]; then
    printf '%s [%s] %s\n' "$ts" "$level" "$msg" >&2
  fi
}

die() {
  log "ERROR" "$*"
  exit 1
}

usage() {
  sed -n '1,40p' "$0" | sed 's/^# \{0,1\}//'
  exit "${1:-0}"
}

load_config() {
  # Shared CWP defaults (ALERT_EMAIL, SERVER_NAME, etc.) — sourced first so
  # per-module config below can override individual values.
  if [[ -r /etc/cwp/common.conf ]]; then
    # shellcheck source=/dev/null
    . /etc/cwp/common.conf
  fi
  if [[ -r "$CONFIG_FILE" ]]; then
    # shellcheck source=/dev/null
    . "$CONFIG_FILE"
    log "INFO" "loaded config from $CONFIG_FILE"
  else
    log "WARN" "no config at $CONFIG_FILE — using built-in defaults"
  fi
  return 0
}

ensure_dirs() {
  for d in "$STATE_DIR" "$FINDINGS_DIR" "$(dirname "$LOG_FILE")"; do
    if [[ ! -d "$d" ]]; then
      mkdir -p "$d" 2>/dev/null || die "cannot create $d (run as root or pre-create)"
    fi
  done
}

preflight() {
  [[ -r "$EXIM_LOG_PATH" ]] || die "cannot read $EXIM_LOG_PATH (need root or exim group)"
  [[ -x "$SENDMAIL_BIN" ]] || log "WARN" "$SENDMAIL_BIN not found — email alerts disabled"
  command -v awk  >/dev/null || die "awk not found"
  command -v date >/dev/null || die "date not found"
}

# in_cooldown <account> -> 0 (yes, suppress) or 1 (no, alert)
in_cooldown() {
  local account="$1"
  local cooldown_file="$STATE_DIR/cooldown.${account}"
  [[ -f "$cooldown_file" ]] || return 1
  local last_alert now diff
  last_alert="$(cat "$cooldown_file" 2>/dev/null || echo 0)"
  now="$(date +%s)"
  diff=$(( now - last_alert ))
  if (( diff < ALERT_COOLDOWN_MINUTES * 60 )); then
    return 0
  fi
  return 1
}

mark_cooldown() {
  local account="$1"
  date +%s > "$STATE_DIR/cooldown.${account}"
}

# scan_exim_log -> emits one line per hit: "<count> <account>"
scan_exim_log() {
  local cutoff_ts cutoff_str
  cutoff_ts=$(( $(date +%s) - WINDOW_SECONDS ))
  cutoff_str="$(date -d "@$cutoff_ts" '+%Y-%m-%d %H:%M:%S')"

  log "INFO" "scanning $EXIM_LOG_PATH for outbound mail since $cutoff_str (window ${WINDOW_SECONDS}s)"

  # Exim log line for outbound message:
  #   2026-05-15 14:23:45 1abc-def-001 <= sender@x H=h A=dovecot_login:cpaneluser P=esmtpsa S=1234
  #   2026-05-15 14:23:45 1abc <= cpaneluser@host U=cpaneluser P=local S=1234
  #
  # We extract the cPanel-authenticated user from A=...:user OR U=user.
  # Lexicographic comparison on ISO 8601 timestamps gives correct ordering.

  awk -v cutoff="$cutoff_str" '
    # only outbound-receive lines with a timestamp newer than cutoff
    $0 >= cutoff && index($0, " <= ") > 0 {
      user = ""
      for (i = 4; i <= NF; i++) {
        if (substr($i, 1, 2) == "A=") {
          n = split(substr($i, 3), parts, ":")
          if (n > 1) { user = parts[2]; break }
        } else if (substr($i, 1, 2) == "U=") {
          user = substr($i, 3); break
        }
      }
      if (user != "" && user != "mailnull" && user != "root" && user != "cpanel") {
        print user
      }
    }
  ' "$EXIM_LOG_PATH" | sort | uniq -c | awk -v t="$THRESHOLD_HOURLY" '$1 >= t { print $1, $2 }'
}

# emit_finding <count> <account>
emit_finding() {
  local count="$1" account="$2"
  local now_iso now_epoch finding_file id action

  now_epoch="$(date +%s)"
  now_iso="$(date '+%Y-%m-%dT%H:%M:%S%z')"
  id="mail-anomaly-${SERVER_NAME}-${account}-$(date +%Y%m%d%H)"
  finding_file="$FINDINGS_DIR/findings.jsonl"

  action="Investigate /home/${account}/public_html for compromise. "
  action+="Inspect mail queue with: exim -bp | grep ${account}. "
  action+="If confirmed: WHM > Account Functions > Suspend Mail for ${account}."

  # Build JSON manually (no jq dependency, but escape strings carefully).
  # account name is restricted by cPanel to [a-z0-9], so safe to inline.
  # SERVER_NAME may contain dots — also safe in JSON strings.
  local json
  json=$(printf '{"ts":"%s","ts_epoch":%d,"module":"%s","server":"%s","severity":"%s","account":"%s","metric":"%s","value":%d,"threshold":%d,"window_seconds":%d,"id":"%s","recommended_action":"%s"}' \
    "$now_iso" "$now_epoch" "mail-anomaly" "$SERVER_NAME" "P1" "$account" \
    "outbound_mail_per_hour" "$count" "$THRESHOLD_HOURLY" "$WINDOW_SECONDS" \
    "$id" "$action")

  if [[ "$DRY_RUN" -eq 1 ]]; then
    printf 'DRY-RUN finding: %s\n' "$json"
    return
  fi

  printf '%s\n' "$json" >> "$finding_file"
  log "INFO" "finding written: account=$account count=$count id=$id"
}

# send_email_alert <count> <account>
send_email_alert() {
  local count="$1" account="$2"
  local subject body now_str

  now_str="$(date '+%Y-%m-%d %H:%M:%S %Z')"
  subject="[CWP P1] mail-anomaly: ${account} sending ${count} mails/hour on ${SERVER_NAME}"

  body=$(cat <<EOF
CloudWatch Pro — Outbound Mail Anomaly

Server:    ${SERVER_NAME}
Account:   ${account}
Metric:    outbound_mail_per_hour = ${count} (threshold: ${THRESHOLD_HOURLY})
Window:    last $((WINDOW_SECONDS / 60)) minutes
Severity:  P1
Detected:  ${now_str}

Recommended actions (in order):

  1. SSH to server:
       ssh root@${SERVER_NAME}

  2. Inspect mail queue for this account:
       exim -bp | grep ${account}

  3. Look at recent file activity in the account's web root:
       find /home/${account}/public_html -type f -mmin -120 -ls

  4. Check the account's recent SMTP auth log:
       grep '${account}' /var/log/exim_mainlog | tail -100

  5. If confirmed compromise:
       WHM > Account Functions > Suspend Mail for ${account}
       (account stays online for the website; only mail is frozen)

  6. After cleanup:
       WHM > Unsuspend Mail

  7. Investigate root cause: outdated WP plugin? stolen FTP creds?
     Use the daily phishing-kit + webshell sweep findings on this server.

This alert will not repeat for the same account within ${ALERT_COOLDOWN_MINUTES} minutes.

CWP detector ID: mail-anomaly-${SERVER_NAME}-${account}-$(date +%Y%m%d%H)
EOF
  )

  if [[ "$DRY_RUN" -eq 1 ]]; then
    printf 'DRY-RUN email to %s:\n  Subject: %s\n%s\n' "$ALERT_EMAIL" "$subject" "$body"
    return
  fi

  if [[ ! -x "$SENDMAIL_BIN" ]]; then
    log "WARN" "sendmail not available; alert email NOT sent for $account"
    return
  fi

  {
    printf 'To: %s\n' "$ALERT_EMAIL"
    printf 'From: cwp-agent@%s\n' "$SERVER_NAME"
    printf 'Subject: %s\n' "$subject"
    printf 'X-CWP-Module: mail-anomaly\n'
    printf 'X-CWP-Severity: P1\n'
    printf 'Content-Type: text/plain; charset=utf-8\n'
    printf '\n%s\n' "$body"
  } | "$SENDMAIL_BIN" -t -i

  log "INFO" "alert email sent to $ALERT_EMAIL for account=$account"
}

# ---- argument parsing ----
while [[ $# -gt 0 ]]; do
  case "$1" in
    --dry-run)  DRY_RUN=1; shift ;;
    --verbose)  VERBOSE=1; shift ;;
    --window)   WINDOW_SECONDS="$2"; shift 2 ;;
    --version)  printf '%s %s\n' "$SCRIPT_NAME" "$VERSION"; exit 0 ;;
    -h|--help)  usage 0 ;;
    *)          printf 'unknown argument: %s\n' "$1" >&2; usage 2 ;;
  esac
done

# ---- main ----
load_config
ensure_dirs
preflight

log "INFO" "$SCRIPT_NAME v$VERSION starting (server=$SERVER_NAME, threshold=${THRESHOLD_HOURLY}/hr, window=${WINDOW_SECONDS}s, dry_run=$DRY_RUN)"

found=0
while read -r count account; do
  [[ -z "$account" ]] && continue
  found=$((found + 1))

  if in_cooldown "$account"; then
    log "INFO" "account=$account count=$count — in cooldown, suppressing alert"
    continue
  fi

  emit_finding "$count" "$account"
  send_email_alert "$count" "$account"

  if [[ "$DRY_RUN" -eq 0 ]]; then
    mark_cooldown "$account"
  fi
done < <(scan_exim_log)

log "INFO" "$SCRIPT_NAME complete: $found anomalous account(s)"
exit 0
