#!/bin/bash
#
# cwp-rbl-check — daily IP reputation check against major DNSBLs
#
# Part of CloudWatch Pro (CWP) v1. Runs daily on each cPanel server.
# Resolves each of the server's primary IPs against 5 major DNS-based
# block lists. If any listing is found, emits a P1 finding and sends an
# immediate email — this is your early warning that mail deliverability
# is about to collapse (Gmail/Outlook start junking before users notice).
#
# Detect-only. Does NOT delist, modify mail config, or take action.
# Operator follows the per-blocklist delist link in the alert email.
#
# DNSBLs queried (all free for low-volume use):
#   - Spamhaus ZEN     (zen.spamhaus.org)        combines SBL + CSS + XBL + PBL
#   - SpamCop          (bl.spamcop.net)
#   - Barracuda        (b.barracudacentral.org)
#   - SORBS            (dnsbl.sorbs.net)
#   - Mailspike        (z.mailspike.net)
#
# Per-IP state remembers the last listing seen, so we only re-alert if a
# listing changes (new blocklist, removed listing, etc.) — not daily for
# the same known-listed IP.
#
# INSTALL:
#   sudo install -d /opt/cwp/agent/modules/rbl-check \
#                   /etc/cwp \
#                   /var/cwp/state/rbl-check \
#                   /var/cwp/findings \
#                   /var/log/cwp
#   sudo install -m 0755 cwp-rbl-check /opt/cwp/agent/modules/rbl-check/
#   sudo install -m 0644 config.example.conf /etc/cwp/rbl-check.conf
#   sudo $EDITOR /etc/cwp/rbl-check.conf   # set ALERT_EMAIL, optionally SERVER_IPS
#   sudo crontab -l 2>/dev/null | { cat; cat cron.example; } | sudo crontab -
#
# USAGE:
#   cwp-rbl-check                    # normal scan
#   cwp-rbl-check --dry-run          # report only, no findings/email/state update
#   cwp-rbl-check --verbose          # log to stderr too
#   cwp-rbl-check --ip 1.2.3.4       # check a single IP (testing)
#   cwp-rbl-check --no-email         # write findings, skip email
#   cwp-rbl-check --version
#
# EXIT CODES:
#   0 — scan completed
#   1 — config or environment error
#   2 — invalid arguments

set -euo pipefail

VERSION="0.1.0"
SCRIPT_NAME="cwp-rbl-check"

# ---- defaults ----
CONFIG_FILE="${CWP_RBL_CHECK_CONFIG:-/etc/cwp/rbl-check.conf}"
STATE_DIR="/var/cwp/state/rbl-check"
FINDINGS_DIR="/var/cwp/findings"
LOG_FILE="/var/log/cwp/rbl-check.log"
ALERT_EMAIL="root@localhost"
SERVER_NAME="$(hostname -f 2>/dev/null || hostname)"
SENDMAIL_BIN="/usr/sbin/sendmail"

# Space-separated IPs to check. Empty = auto-detect from cPanel.
SERVER_IPS=""

# DNS query timeout (seconds) per blocklist
DNS_TIMEOUT=3

# Blocklists: "name|domain|severity|delist_url_template"
# Severity: P1 = mail deliverability impact, P2 = informational
RBL_LIST=(
  "Spamhaus ZEN|zen.spamhaus.org|P1|https://check.spamhaus.org/listed?searchterm=%s"
  "SpamCop|bl.spamcop.net|P1|https://www.spamcop.net/bl.shtml?%s"
  "Barracuda|b.barracudacentral.org|P1|https://www.barracudacentral.org/lookups?ip=%s"
  "SORBS|dnsbl.sorbs.net|P2|http://www.sorbs.net/lookup.shtml?%s"
  "Mailspike|z.mailspike.net|P2|https://mailspike.net/iprep.html?ip=%s"
)

DRY_RUN=0
VERBOSE=0
NO_EMAIL=0
SINGLE_IP=""

# ---- helpers ----

log() {
  local level="$1"; shift
  local ts; ts="$(date '+%Y-%m-%d %H:%M:%S')"
  printf '%s [%s] %s\n' "$ts" "$level" "$*" >> "$LOG_FILE" 2>/dev/null || true
  if [[ "$VERBOSE" -eq 1 ]] || [[ "$level" == "ERROR" ]]; then
    printf '%s [%s] %s\n' "$ts" "$level" "$*" >&2
  fi
}

die() { log "ERROR" "$*"; exit 1; }

usage() { sed -n '1,40p' "$0" | sed 's/^# \{0,1\}//'; exit "${1:-0}"; }

load_config() {
  # Shared CWP defaults (ALERT_EMAIL, SERVER_NAME, etc.) sourced first.
  if [[ -r /etc/cwp/common.conf ]]; then
    # shellcheck source=/dev/null
    . /etc/cwp/common.conf
  fi
  if [[ -r "$CONFIG_FILE" ]]; then
    # shellcheck source=/dev/null
    . "$CONFIG_FILE"
    log "INFO" "loaded config from $CONFIG_FILE"
  else
    log "WARN" "no config at $CONFIG_FILE — using built-in defaults"
  fi
  return 0
}

ensure_dirs() {
  for d in "$STATE_DIR" "$FINDINGS_DIR" "$(dirname "$LOG_FILE")"; do
    if [[ ! -d "$d" ]]; then
      mkdir -p "$d" 2>/dev/null || die "cannot create $d (run as root or pre-create)"
    fi
  done
}

preflight() {
  command -v dig >/dev/null || die "dig not found (install bind-utils)"
  if [[ "$NO_EMAIL" -eq 0 ]] && [[ ! -x "$SENDMAIL_BIN" ]]; then
    log "WARN" "$SENDMAIL_BIN not found — email will be skipped"
  fi
}

# json_escape <string>
json_escape() {
  local s="$1"
  s="${s//\\/\\\\}"; s="${s//\"/\\\"}"
  s="${s//$'\n'/\\n}"; s="${s//$'\t'/\\t}"; s="${s//$'\r'/\\r}"
  printf '%s' "$s"
}

# discover_ips — auto-detect server IPs from cPanel files
discover_ips() {
  local ips=""
  if [[ -r /var/cpanel/mainip ]]; then
    ips="$(cat /var/cpanel/mainip 2>/dev/null)"
  fi
  if [[ -r /etc/ips ]]; then
    # /etc/ips format: <ip>:<netmask>:<broadcast>
    local extra
    extra="$(awk -F: '{print $1}' /etc/ips 2>/dev/null | tr '\n' ' ')"
    ips="$ips $extra"
  fi
  # Fall back to hostname -I if neither cPanel file existed
  if [[ -z "$(echo "$ips" | tr -d ' ')" ]]; then
    ips="$(hostname -I 2>/dev/null || true)"
  fi
  # Filter to public-looking IPv4 (skip 127.x, 10.x, 172.16-31.x, 192.168.x)
  echo "$ips" | tr ' ' '\n' | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' | \
    awk -F. '
      $1 == 127 { next }
      $1 == 10  { next }
      $1 == 192 && $2 == 168 { next }
      $1 == 172 && $2 >= 16 && $2 <= 31 { next }
      $1 == 169 && $2 == 254 { next }
      { print }
    ' | sort -u | tr '\n' ' '
}

# reverse_ip <ip> -> reversed octets (1.2.3.4 -> 4.3.2.1)
reverse_ip() {
  awk -F. '{print $4"."$3"."$2"."$1}' <<< "$1"
}

# query_dnsbl <ip> <bl_domain> -> returns the DNS A record value (e.g., 127.0.0.2)
# or empty string if not listed
query_dnsbl() {
  local ip="$1" bl="$2"
  local rev
  rev="$(reverse_ip "$ip")"
  # +short returns only the answer; +time/+tries cap the wait
  local result
  result="$(dig +short +time="$DNS_TIMEOUT" +tries=1 "${rev}.${bl}" A 2>/dev/null | head -1)"
  # Listing responses are always 127.0.0.x — anything else (NXDOMAIN, timeout) means clean
  if [[ "$result" =~ ^127\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
    echo "$result"
  fi
}

# state_key <ip> <bl_name>
state_key() {
  local ip="$1" bl="$2"
  printf '%s.%s' "${ip//./_}" "${bl// /_}"
}

# read_state <key> — returns previously-seen response or empty
read_state() {
  local k="$1"
  local f="$STATE_DIR/seen.${k}"
  [[ -f "$f" ]] && cat "$f" || echo ""
}

# write_state <key> <value>
write_state() {
  local k="$1" v="$2"
  printf '%s' "$v" > "$STATE_DIR/seen.${k}"
}

# clear_state <key>
clear_state() {
  rm -f "$STATE_DIR/seen.${1}" 2>/dev/null
}

# emit_finding <ip> <bl_name> <bl_domain> <severity> <response> <delist_url>
emit_finding() {
  local ip="$1" bl_name="$2" bl_domain="$3" sev="$4" response="$5" delist_url="$6"
  local now_iso now_epoch finding_file id action

  now_epoch="$(date +%s)"
  now_iso="$(date '+%Y-%m-%dT%H:%M:%S%z')"
  id="rbl-check-${SERVER_NAME}-${ip//./_}-${bl_name// /_}"
  finding_file="$FINDINGS_DIR/findings.jsonl"

  # shellcheck disable=SC2059
  local delist_link; delist_link="$(printf "$delist_url" "$ip")"
  action="Server IP ${ip} is listed on ${bl_name} (response: ${response}). Visit the delist page: ${delist_link}. Investigate root cause first (likely outbound spam from a compromised account — see mail-anomaly + phishing-sweep findings) before requesting delist, or you will be re-listed within hours."

  local action_esc bl_name_esc
  action_esc="$(json_escape "$action")"
  bl_name_esc="$(json_escape "$bl_name")"

  local json
  json=$(printf '{"ts":"%s","ts_epoch":%d,"module":"%s","server":"%s","severity":"%s","src_ip":"%s","metric":"%s","value":1,"blocklist":"%s","blocklist_domain":"%s","dns_response":"%s","delist_url":"%s","id":"%s","recommended_action":"%s"}' \
    "$now_iso" "$now_epoch" "rbl-check" "$SERVER_NAME" "$sev" "$ip" \
    "ip_on_dnsbl" "$bl_name_esc" "$bl_domain" "$response" "$delist_link" \
    "$id" "$action_esc")

  if [[ "$DRY_RUN" -eq 1 ]]; then
    printf 'DRY-RUN finding: %s\n' "$json"
  else
    printf '%s\n' "$json" >> "$finding_file"
  fi

  DIGEST_LINES+=("[$sev] ${ip}  listed on ${bl_name}  (response=${response})  delist: ${delist_link}")
}

# send_alert_email
send_alert_email() {
  local count="${#DIGEST_LINES[@]}"
  if (( count == 0 )); then
    log "INFO" "no listings — email NOT sent"
    return
  fi

  if [[ "$NO_EMAIL" -eq 1 ]]; then
    log "INFO" "$count listing(s), --no-email set — email skipped"
    return
  fi

  local subject body now_str findings_block
  now_str="$(date '+%Y-%m-%d %H:%M:%S %Z')"
  subject="[CWP P1] rbl-check: ${count} blocklist listing(s) on ${SERVER_NAME}"
  findings_block="$(printf '%s\n' "${DIGEST_LINES[@]}")"

  IFS='' read -r -d '' body <<EOF || true
CloudWatch Pro — IP Reputation (DNSBL) Check

Server:    ${SERVER_NAME}
Run time:  ${now_str}
Listings:  ${count}

----- listings -----

${findings_block}

----- what to do -----

A blocklisted IP means mail from this server is being rejected or junked
by major mail providers (Gmail, Outlook, Yahoo). Act fast.

  1. Find the root cause FIRST. Do not request delisting until clean,
     or you will be re-listed within hours.

       Check recent mail-anomaly findings:
           grep -E 'mail-anomaly' /var/cwp/findings/findings.jsonl | tail -20

       Check recent phishing-sweep findings:
           grep -E 'phishing-sweep' /var/cwp/findings/findings.jsonl | tail -20

       Inspect Exim queue:
           exim -bp | head -50

  2. Stop the abuse:
       - Suspend mail for any compromised account: WHM > Suspend Mail
       - Clean up webshells / mailer scripts in /home/<account>/public_html
       - Force WP admin password reset

  3. Wait 1-4 hours for queue to drain and abuse signals to age out.

  4. Request delisting via each blocklist's link in the findings above.
     Most automated delists take 1-12 hours.

  5. Monitor: this script runs daily. Re-listings will alert again only
     if the response code changes (so a stable listing alerts once, not
     every day).

This module is detect-only. CWP did NOT modify mail config or contact any blocklist.

Findings file: /var/cwp/findings/findings.jsonl
Sweep log:     /var/log/cwp/rbl-check.log
EOF

  if [[ "$DRY_RUN" -eq 1 ]]; then
    printf 'DRY-RUN email to %s:\n  Subject: %s\n%s\n' "$ALERT_EMAIL" "$subject" "$body"
    return
  fi

  if [[ ! -x "$SENDMAIL_BIN" ]]; then
    log "WARN" "sendmail not available; alert email NOT sent"
    return
  fi

  {
    printf 'To: %s\n' "$ALERT_EMAIL"
    printf 'From: cwp-agent@%s\n' "$SERVER_NAME"
    printf 'Subject: %s\n' "$subject"
    printf 'X-CWP-Module: rbl-check\n'
    printf 'X-CWP-Severity: P1\n'
    printf 'Content-Type: text/plain; charset=utf-8\n'
    printf '\n%s\n' "$body"
  } | "$SENDMAIL_BIN" -t -i

  log "INFO" "alert email sent to $ALERT_EMAIL with $count listing(s)"
}

# ---- argument parsing ----
while [[ $# -gt 0 ]]; do
  case "$1" in
    --dry-run)  DRY_RUN=1; shift ;;
    --verbose)  VERBOSE=1; shift ;;
    --no-email) NO_EMAIL=1; shift ;;
    --ip)       SINGLE_IP="$2"; shift 2 ;;
    --version)  printf '%s %s\n' "$SCRIPT_NAME" "$VERSION"; exit 0 ;;
    -h|--help)  usage 0 ;;
    *)          printf 'unknown argument: %s\n' "$1" >&2; usage 2 ;;
  esac
done

# ---- main ----
load_config
ensure_dirs
preflight

DIGEST_LINES=()

# Determine list of IPs to check
if [[ -n "$SINGLE_IP" ]]; then
  IPS_TO_CHECK="$SINGLE_IP"
elif [[ -n "$SERVER_IPS" ]]; then
  IPS_TO_CHECK="$SERVER_IPS"
else
  IPS_TO_CHECK="$(discover_ips)"
fi

# Trim and verify we have at least one IP
IPS_TO_CHECK="$(echo "$IPS_TO_CHECK" | tr -s ' ' | sed 's/^ //;s/ $//')"
if [[ -z "$IPS_TO_CHECK" ]]; then
  die "no IPs to check (auto-detect found none; set SERVER_IPS in config or use --ip)"
fi

log "INFO" "$SCRIPT_NAME v$VERSION starting (server=$SERVER_NAME, dry_run=$DRY_RUN, ips=$IPS_TO_CHECK)"

total_listings=0
total_clean=0
total_changed=0

for ip in $IPS_TO_CHECK; do
  log "INFO" "checking $ip against ${#RBL_LIST[@]} blocklists"
  for bl_entry in "${RBL_LIST[@]}"; do
    IFS='|' read -r bl_name bl_domain bl_sev bl_url <<< "$bl_entry"
    response="$(query_dnsbl "$ip" "$bl_domain" || true)"
    key="$(state_key "$ip" "$bl_name")"
    last_seen="$(read_state "$key")"

    if [[ -n "$response" ]]; then
      # Currently listed
      total_listings=$(( total_listings + 1 ))
      if [[ "$response" != "$last_seen" ]]; then
        # New listing OR response changed → emit finding
        emit_finding "$ip" "$bl_name" "$bl_domain" "$bl_sev" "$response" "$bl_url"
        if [[ "$DRY_RUN" -eq 0 ]]; then
          write_state "$key" "$response"
        fi
        total_changed=$(( total_changed + 1 ))
      else
        log "INFO" "  $bl_name: still listed as $response (no change, no re-alert)"
      fi
    else
      # Not listed — clear stale state if any
      total_clean=$(( total_clean + 1 ))
      if [[ -n "$last_seen" ]]; then
        log "INFO" "  $bl_name: was listed as $last_seen, now clean — clearing state"
        if [[ "$DRY_RUN" -eq 0 ]]; then
          clear_state "$key"
        fi
      fi
    fi
  done
done

send_alert_email

log "INFO" "$SCRIPT_NAME complete: listings=$total_listings clean=$total_clean new_or_changed=$total_changed"
exit 0
