#!/bin/bash
#
# cwp-login-failures — login brute-force detector
#
# Part of CloudWatch Pro (CWP) v1. Runs every 15 minutes on each cPanel server.
# Tails three log sources for failed-auth events:
#   1. /var/log/secure              — SSH password failures
#   2. /usr/local/cpanel/logs/login_log — cPanel + WHM login failures
#   3. /var/log/exim_mainlog        — SMTP-auth failures (dovecot_login)
#
# Buckets failures by source IP. If any IP exceeds the per-source threshold
# within one run window, emits a finding + sends a digest email.
#
# Detect-only. Does NOT block IPs, modify CSF, or change firewall rules.
# Operator decides what to do with each finding (typically: csf -d <ip>).
#
# Complements cphulkd (which blocks cPanel/WHM brute force silently and only
# covers those surfaces). This script gives you visibility across SSH, WHM,
# cPanel, AND SMTP — and keeps an audit trail of every brute-force attempt.
#
# OFFSET TRACKING:
#   Each log is tracked by byte offset in /var/cwp/state/login-failures/.
#   Each run reads only new lines since the last run, then updates the offset.
#   On log rotation (file shrunk), offset resets and the next run starts fresh.
#
# INSTALL (on each cPanel server, after Week 0 is done):
#   sudo install -d /opt/cwp/agent/modules/login-failures \
#                   /etc/cwp \
#                   /var/cwp/state/login-failures \
#                   /var/cwp/findings \
#                   /var/log/cwp
#   sudo install -m 0755 cwp-login-failures /opt/cwp/agent/modules/login-failures/
#   sudo install -m 0644 config.example.conf /etc/cwp/login-failures.conf
#   sudo $EDITOR /etc/cwp/login-failures.conf   # set ALERT_EMAIL, SERVER_NAME
#   sudo crontab -l 2>/dev/null | { cat; cat cron.example; } | sudo crontab -
#
# USAGE:
#   cwp-login-failures                 # normal run
#   cwp-login-failures --dry-run       # report only, no findings/email/offset update
#   cwp-login-failures --verbose       # log to stderr too
#   cwp-login-failures --no-email      # write findings, skip digest email
#   cwp-login-failures --reset         # reset all offsets to current EOF (skip backlog)
#   cwp-login-failures --version
#
# EXIT CODES:
#   0 — run completed
#   1 — config or environment error
#   2 — invalid arguments

set -euo pipefail

VERSION="0.1.0"
SCRIPT_NAME="cwp-login-failures"

# ---- defaults (overridden by /etc/cwp/login-failures.conf) ----
CONFIG_FILE="${CWP_LOGIN_FAILURES_CONFIG:-/etc/cwp/login-failures.conf}"
STATE_DIR="/var/cwp/state/login-failures"
FINDINGS_DIR="/var/cwp/findings"
LOG_FILE="/var/log/cwp/login-failures.log"
ALERT_EMAIL="root@localhost"
SERVER_NAME="$(hostname -f 2>/dev/null || hostname)"
SENDMAIL_BIN="/usr/sbin/sendmail"

# Sources: SSH, cPanel, Exim. Override paths in config if your install differs.
SSH_LOG="/var/log/secure"
CPANEL_LOG="/usr/local/cpanel/logs/login_log"
EXIM_LOG="/var/log/exim_mainlog"

# Thresholds: failures in one run window (default cron: 15 min) per IP.
THRESHOLD_SSH=10
THRESHOLD_CPANEL=5
THRESHOLD_EXIM=20

# Per-IP cooldown — don't re-alert on the same IP+source within N minutes.
ALERT_COOLDOWN_MINUTES=60

DRY_RUN=0
VERBOSE=0
NO_EMAIL=0
RESET=0

# ---- helpers ----

log() {
  local level="$1"; shift
  local msg="$*"
  local ts
  ts="$(date '+%Y-%m-%d %H:%M:%S')"
  printf '%s [%s] %s\n' "$ts" "$level" "$msg" >> "$LOG_FILE" 2>/dev/null || true
  if [[ "$VERBOSE" -eq 1 ]] || [[ "$level" == "ERROR" ]]; then
    printf '%s [%s] %s\n' "$ts" "$level" "$msg" >&2
  fi
}

die() {
  log "ERROR" "$*"
  exit 1
}

usage() {
  sed -n '1,40p' "$0" | sed 's/^# \{0,1\}//'
  exit "${1:-0}"
}

load_config() {
  # Shared CWP defaults (ALERT_EMAIL, SERVER_NAME, etc.) sourced first.
  if [[ -r /etc/cwp/common.conf ]]; then
    # shellcheck source=/dev/null
    . /etc/cwp/common.conf
  fi
  if [[ -r "$CONFIG_FILE" ]]; then
    # shellcheck source=/dev/null
    . "$CONFIG_FILE"
    log "INFO" "loaded config from $CONFIG_FILE"
  else
    log "WARN" "no config at $CONFIG_FILE — using built-in defaults"
  fi
  return 0
}

ensure_dirs() {
  for d in "$STATE_DIR" "$FINDINGS_DIR" "$(dirname "$LOG_FILE")"; do
    if [[ ! -d "$d" ]]; then
      mkdir -p "$d" 2>/dev/null || die "cannot create $d (run as root or pre-create)"
    fi
  done
}

preflight() {
  for cmd in awk sed grep stat tail; do
    command -v "$cmd" >/dev/null || die "$cmd not found"
  done
  if [[ "$NO_EMAIL" -eq 0 ]] && [[ ! -x "$SENDMAIL_BIN" ]]; then
    log "WARN" "$SENDMAIL_BIN not found — digest email will be skipped"
  fi
}

# file_size_portable <path> — GNU stat or BSD stat
file_size_portable() {
  stat -c %s "$1" 2>/dev/null || stat -f %z "$1" 2>/dev/null || echo 0
}

# read_offset <state_key> — returns last byte offset (0 if never read)
read_offset() {
  local key="$1"
  local f="$STATE_DIR/offset.${key}"
  if [[ -f "$f" ]]; then
    cat "$f"
  else
    echo 0
  fi
}

# write_offset <state_key> <new_offset>
write_offset() {
  local key="$1" off="$2"
  printf '%s' "$off" > "$STATE_DIR/offset.${key}"
}

# in_cooldown <state_key> — 0 (suppress) or 1 (allow alert)
in_cooldown() {
  local key="$1"
  local f="$STATE_DIR/cooldown.${key}"
  [[ -f "$f" ]] || return 1
  local last now diff
  last="$(cat "$f" 2>/dev/null || echo 0)"
  now="$(date +%s)"
  diff=$(( now - last ))
  if (( diff < ALERT_COOLDOWN_MINUTES * 60 )); then
    return 0
  fi
  return 1
}

mark_cooldown() {
  date +%s > "$STATE_DIR/cooldown.${1}"
}

# json_escape <string>
json_escape() {
  local s="$1"
  s="${s//\\/\\\\}"
  s="${s//\"/\\\"}"
  s="${s//$'\n'/\\n}"
  s="${s//$'\t'/\\t}"
  s="${s//$'\r'/\\r}"
  printf '%s' "$s"
}

# tail_new <log_path> <state_key> — emits new bytes since last offset.
# Updates the offset to current EOF after reading.
# If the file shrunk (rotation), starts from 0.
tail_new() {
  local log_path="$1" key="$2"
  [[ -r "$log_path" ]] || { log "WARN" "cannot read $log_path — skipping"; return 0; }

  local cur_size last_off
  cur_size="$(file_size_portable "$log_path")"
  last_off="$(read_offset "$key")"

  if (( cur_size < last_off )); then
    log "INFO" "$key: log rotated (size $cur_size < offset $last_off) — resetting"
    last_off=0
  fi

  if (( cur_size == last_off )); then
    return 0  # no new data
  fi

  # Read from last_off to current EOF
  local bytes_to_read=$(( cur_size - last_off ))
  tail -c "+$((last_off + 1))" "$log_path" 2>/dev/null | head -c "$bytes_to_read"

  # Update offset (unless dry-run)
  if [[ "$DRY_RUN" -eq 0 ]] && [[ "$RESET" -eq 0 ]]; then
    write_offset "$key" "$cur_size"
  fi
}

# Parse SSH failures: extract source IPs from "Failed password ... from <ip>"
# and "Invalid user ... from <ip>" lines.
parse_ssh_failures() {
  local data="$1"
  # Match lines like:
  #   sshd[12345]: Failed password for root from 1.2.3.4 port 22 ssh2
  #   sshd[12345]: Failed password for invalid user admin from 1.2.3.4 port 22 ssh2
  #   sshd[12345]: Invalid user admin from 1.2.3.4 port 22
  printf '%s' "$data" | grep -E 'sshd\[[0-9]+\]:' | grep -E '(Failed password|Invalid user)' | \
    awk '{
      ip = ""; user = ""
      for (i = 1; i <= NF; i++) {
        if ($i == "from" && (i+1) <= NF) { ip = $(i+1); }
        if ($i == "for" && (i+1) <= NF) {
          if ($(i+1) == "invalid") { user = $(i+3) } else { user = $(i+1) }
        }
        if ($i == "Invalid" && $(i+1) == "user" && (i+2) <= NF) { user = $(i+2) }
      }
      if (ip != "") print ip "\t" user
    }'
}

# Parse cPanel/WHM failures from /usr/local/cpanel/logs/login_log
# Format: 1.2.3.4 - root [05/15/2026:12:34:56 -0500] "FAILED LOGIN whostmgrd: ..."
parse_cpanel_failures() {
  local data="$1"
  printf '%s' "$data" | grep -E 'FAILED LOGIN' | \
    awk '{
      ip = $1
      user = $3
      service = ""
      for (i = 1; i <= NF; i++) {
        if ($i == "LOGIN" && (i+1) <= NF) { service = $(i+1); gsub(/:/, "", service); break }
      }
      if (ip != "" && ip != "-") print ip "\t" user "\t" service
    }'
}

# Parse Exim SMTP-auth failures from /var/log/exim_mainlog
# Format: 2026-05-15 12:34:56 dovecot_login authenticator failed for (host) [1.2.3.4]: 535 ...
parse_exim_failures() {
  local data="$1"
  printf '%s' "$data" | grep -E 'authenticator failed for' | \
    awk '{
      ip = ""
      for (i = 1; i <= NF; i++) {
        if ($i ~ /^\[[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\]:?$/) {
          ip = substr($i, 2, length($i)-2)
          gsub(/\]:$/, "", ip)
          gsub(/\]/, "", ip)
          break
        }
      }
      if (ip != "") print ip
    }'
}

# Process one source: parser <source-name> <threshold> <findings_emitted_var>
# (Returns IP -> count map via globals BUCKET_IPS, BUCKET_COUNTS, BUCKET_USERS)
# Bash 3 doesn't support associative arrays portably; we use parallel arrays.
declare -a BUCKET_IPS=()
declare -a BUCKET_COUNTS=()
declare -a BUCKET_USERS=()
declare -a BUCKET_SOURCES=()

bucket_add() {
  local source="$1" ip="$2" user="${3:-}"
  local i found=0
  for ((i = 0; i < ${#BUCKET_IPS[@]}; i++)); do
    if [[ "${BUCKET_SOURCES[$i]}" == "$source" ]] && [[ "${BUCKET_IPS[$i]}" == "$ip" ]]; then
      BUCKET_COUNTS[$i]=$(( BUCKET_COUNTS[$i] + 1 ))
      # accumulate up to 5 unique recent users
      local existing="${BUCKET_USERS[$i]}"
      if [[ -n "$user" ]] && [[ "$existing" != *"|$user|"* ]]; then
        local count
        count="$(awk -v s="$existing" 'BEGIN { n = split(s, a, "|"); print n }')"
        if (( count <= 5 )); then
          BUCKET_USERS[$i]="${existing}${user}|"
        fi
      fi
      found=1
      break
    fi
  done
  if [[ "$found" -eq 0 ]]; then
    BUCKET_SOURCES+=("$source")
    BUCKET_IPS+=("$ip")
    BUCKET_COUNTS+=(1)
    if [[ -n "$user" ]]; then
      BUCKET_USERS+=("|${user}|")
    else
      BUCKET_USERS+=("")
    fi
  fi
}

# emit_finding <source> <ip> <count> <users> <threshold>
emit_finding() {
  local source="$1" ip="$2" count="$3" users="$4" threshold="$5"
  local now_iso now_epoch finding_file id action sev users_clean

  now_epoch="$(date +%s)"
  now_iso="$(date '+%Y-%m-%dT%H:%M:%S%z')"
  id="login-failures-${source}-${SERVER_NAME}-${ip}-$(date +%Y%m%d%H%M)"
  finding_file="$FINDINGS_DIR/findings.jsonl"

  # Strip leading/trailing pipes from users string
  users_clean="${users#|}"
  users_clean="${users_clean%|}"
  users_clean="${users_clean//|/, }"

  # P1 if SSH/WHM root attempted, P2 otherwise
  if [[ "$users_clean" == *"root"* ]] || [[ "$source" == "ssh" ]]; then
    sev="P1"
  else
    sev="P2"
  fi

  action="Investigate auth log on ${SERVER_NAME}. To block this IP at the firewall: csf -d ${ip} (or 'csf -td ${ip} 86400' for a 24-hour block). To unblock later: csf -dr ${ip}."

  local action_esc users_esc
  action_esc="$(json_escape "$action")"
  users_esc="$(json_escape "${users_clean:-unknown}")"

  local json
  json=$(printf '{"ts":"%s","ts_epoch":%d,"module":"%s","server":"%s","severity":"%s","source":"%s","src_ip":"%s","metric":"%s","value":%d,"threshold":%d,"users_attempted":"%s","id":"%s","recommended_action":"%s"}' \
    "$now_iso" "$now_epoch" "login-failures" "$SERVER_NAME" "$sev" "$source" \
    "$ip" "auth_failures_per_window" "$count" "$threshold" \
    "$users_esc" "$id" "$action_esc")

  if [[ "$DRY_RUN" -eq 1 ]]; then
    printf 'DRY-RUN finding: %s\n' "$json"
  else
    printf '%s\n' "$json" >> "$finding_file"
  fi

  DIGEST_LINES+=("[$sev] ${source}  ip=${ip}  failures=${count}  users={${users_clean:-?}}")
}

# send_digest_email
send_digest_email() {
  local count="${#DIGEST_LINES[@]}"
  if (( count == 0 )); then
    log "INFO" "no new brute-force findings — digest email NOT sent"
    return
  fi

  if [[ "$NO_EMAIL" -eq 1 ]]; then
    log "INFO" "$count new findings, --no-email set — digest skipped"
    return
  fi

  local subject body now_str findings_block
  now_str="$(date '+%Y-%m-%d %H:%M:%S %Z')"
  subject="[CWP] login-failures: ${count} brute-force IP(s) on ${SERVER_NAME}"

  findings_block="$(printf '%s\n' "${DIGEST_LINES[@]}")"

  # Build body with read -d '' (avoids the $(cat <<EOF) apostrophe-eating gotcha)
  IFS='' read -r -d '' body <<EOF || true
CloudWatch Pro — Login Failure Tracker

Server:    ${SERVER_NAME}
Run time:  ${now_str}
New brute-force findings: ${count}

----- findings -----

${findings_block}

----- recommended actions -----

For each P1 finding:
  1. Check the source IP on AbuseIPDB:
       https://www.abuseipdb.com/check/<ip>

  2. Block the IP at the firewall (CSF):
       csf -d <ip>                       # permanent block
       csf -td <ip> 86400                # temporary 24-hour block

  3. To unblock later:
       csf -dr <ip>

  4. Verify cphulkd is running (covers cPanel/WHM brute force):
       whmapi1 get_hulk_status

  5. If many IPs are hitting at once (distributed attack):
       Consider Cloudflare Under Attack Mode for the affected zone, or
       enable the Cloudflare WAF managed ruleset.

This module is detect-only. CWP did NOT block any IP.

Findings file:  /var/cwp/findings/findings.jsonl
Per-IP cooldown: ${ALERT_COOLDOWN_MINUTES} minutes (no re-alert on same IP within window)
EOF

  if [[ "$DRY_RUN" -eq 1 ]]; then
    printf 'DRY-RUN digest email to %s:\n  Subject: %s\n%s\n' "$ALERT_EMAIL" "$subject" "$body"
    return
  fi

  if [[ ! -x "$SENDMAIL_BIN" ]]; then
    log "WARN" "sendmail not available; digest email NOT sent"
    return
  fi

  {
    printf 'To: %s\n' "$ALERT_EMAIL"
    printf 'From: cwp-agent@%s\n' "$SERVER_NAME"
    printf 'Subject: %s\n' "$subject"
    printf 'X-CWP-Module: login-failures\n'
    printf 'Content-Type: text/plain; charset=utf-8\n'
    printf '\n%s\n' "$body"
  } | "$SENDMAIL_BIN" -t -i

  log "INFO" "digest email sent to $ALERT_EMAIL with $count finding(s)"
}

# ---- argument parsing ----
while [[ $# -gt 0 ]]; do
  case "$1" in
    --dry-run)  DRY_RUN=1; shift ;;
    --verbose)  VERBOSE=1; shift ;;
    --no-email) NO_EMAIL=1; shift ;;
    --reset)    RESET=1; shift ;;
    --version)  printf '%s %s\n' "$SCRIPT_NAME" "$VERSION"; exit 0 ;;
    -h|--help)  usage 0 ;;
    *)          printf 'unknown argument: %s\n' "$1" >&2; usage 2 ;;
  esac
done

# ---- main ----
load_config
ensure_dirs
preflight

DIGEST_LINES=()

log "INFO" "$SCRIPT_NAME v$VERSION starting (server=$SERVER_NAME, dry_run=$DRY_RUN, reset=$RESET)"

# --reset mode: snap all offsets to current EOF and exit (used after install)
if [[ "$RESET" -eq 1 ]]; then
  for src in "ssh:$SSH_LOG" "cpanel:$CPANEL_LOG" "exim:$EXIM_LOG"; do
    key="${src%%:*}"
    path="${src#*:}"
    if [[ -r "$path" ]]; then
      sz="$(file_size_portable "$path")"
      write_offset "$key" "$sz"
      log "INFO" "reset $key offset to $sz ($path)"
    fi
  done
  log "INFO" "reset complete; skipping scan"
  exit 0
fi

# --- SSH ---
ssh_data="$(tail_new "$SSH_LOG" "ssh")"
if [[ -n "$ssh_data" ]]; then
  while IFS=$'\t' read -r ip user; do
    [[ -z "$ip" ]] && continue
    bucket_add "ssh" "$ip" "$user"
  done < <(parse_ssh_failures "$ssh_data")
fi

# --- cPanel + WHM ---
cpanel_data="$(tail_new "$CPANEL_LOG" "cpanel")"
if [[ -n "$cpanel_data" ]]; then
  while IFS=$'\t' read -r ip user service; do
    [[ -z "$ip" ]] && continue
    if [[ "$service" == "whostmgrd" ]]; then
      bucket_add "whm" "$ip" "$user"
    else
      bucket_add "cpanel" "$ip" "$user"
    fi
  done < <(parse_cpanel_failures "$cpanel_data")
fi

# --- Exim ---
exim_data="$(tail_new "$EXIM_LOG" "exim")"
if [[ -n "$exim_data" ]]; then
  while IFS=$'\t' read -r ip; do
    [[ -z "$ip" ]] && continue
    bucket_add "exim" "$ip" ""
  done < <(parse_exim_failures "$exim_data")
fi

# Evaluate buckets against thresholds
total_emitted=0
for ((i = 0; i < ${#BUCKET_IPS[@]}; i++)); do
  src="${BUCKET_SOURCES[$i]}"
  ip="${BUCKET_IPS[$i]}"
  count="${BUCKET_COUNTS[$i]}"
  users="${BUCKET_USERS[$i]}"

  case "$src" in
    ssh)              threshold="$THRESHOLD_SSH" ;;
    whm|cpanel)       threshold="$THRESHOLD_CPANEL" ;;
    exim)             threshold="$THRESHOLD_EXIM" ;;
    *)                threshold=999999 ;;
  esac

  if (( count < threshold )); then
    continue
  fi

  cooldown_key="${src}.${ip//[^a-zA-Z0-9]/_}"
  if in_cooldown "$cooldown_key"; then
    log "INFO" "$src ip=$ip count=$count — in cooldown, suppressing"
    continue
  fi

  emit_finding "$src" "$ip" "$count" "$users" "$threshold"
  if [[ "$DRY_RUN" -eq 0 ]]; then
    mark_cooldown "$cooldown_key"
  fi
  total_emitted=$(( total_emitted + 1 ))
done

send_digest_email

log "INFO" "$SCRIPT_NAME complete: ${#BUCKET_IPS[@]} unique IPs across all sources, ${total_emitted} new finding(s) emitted"
exit 0
