fix: Add in-memory cache for lower-confidence phishing checks

2026-04-22 07:40:23 +02:00
parent b7eb2549d4
commit 208c809a90
4 changed files with 181 additions and 74 deletions
@@ -158,6 +158,9 @@ Required for builds and deployment (see turbo.json and .env.example):
  - `OPENROUTER_API_KEY` - API key for OpenRouter (enables phishing detection)
  - `OPENROUTER_MODEL` (default: anthropic/claude-3-haiku) - LLM model to use for content analysis
  - `PHISHING_DETECTION_SAMPLE_RATE` (default: 0.1) - Percentage of emails to check (0.0-1.0, e.g., 0.1 = 10%)
+  - `PHISHING_CONFIDENCE_THRESHOLD` (default: 85) - Minimum confidence percentage (0-100) to auto-disable project for single detection
+  - `PHISHING_CUMULATIVE_THRESHOLD` (default: 3) - Number of phishing detections within time window to trigger auto-disable
+  - `PHISHING_CUMULATIVE_WINDOW_MS` (default: 3600000) - Time window in milliseconds for cumulative tracking (default 1 hour)

 **Important Notes:**

@@ -122,3 +122,6 @@ export const OPENROUTER_API_KEY = validateEnv('OPENROUTER_API_KEY', '');
 export const OPENROUTER_MODEL = validateEnv('OPENROUTER_MODEL', 'anthropic/claude-3-haiku');
 export const PHISHING_DETECTION_SAMPLE_RATE = Number(validateEnv('PHISHING_DETECTION_SAMPLE_RATE', '0.1')); // Default 10% of emails
 export const PHISHING_DETECTION_ENABLED = OPENROUTER_API_KEY !== '';
+export const PHISHING_CONFIDENCE_THRESHOLD = Number(validateEnv('PHISHING_CONFIDENCE_THRESHOLD', '85')); // Confidence % to disable project (default 85%)
+export const PHISHING_CUMULATIVE_THRESHOLD = Number(validateEnv('PHISHING_CUMULATIVE_THRESHOLD', '3')); // Number of phishing detections before auto-disable (default 3)
+export const PHISHING_CUMULATIVE_WINDOW_MS = Number(validateEnv('PHISHING_CUMULATIVE_WINDOW_MS', '3600000')); // Time window for cumulative tracking in ms (default 1 hour)
@@ -16,6 +16,9 @@ import {
  LANDING_URI,
  OPENROUTER_API_KEY,
  OPENROUTER_MODEL,
+  PHISHING_CONFIDENCE_THRESHOLD,
+  PHISHING_CUMULATIVE_THRESHOLD,
+  PHISHING_CUMULATIVE_WINDOW_MS,
  PHISHING_DETECTION_ENABLED,
  PHISHING_DETECTION_SAMPLE_RATE,
 } from '../app/constants.js';
@@ -74,6 +77,58 @@ const SECURITY_THRESHOLDS = {
  NEW_PROJECT_COMPLAINT_7DAY_CEILING_CRITICAL: 20,
 } as const;

+/**
+ * Redis-based tracking for phishing detections per project
+ * Tracks timestamp and confidence of recent phishing detections
+ * Uses sorted sets for efficient time-based filtering
+ */
+interface PhishingDetection {
+  timestamp: number;
+  confidence: number;
+  subject: string;
+}
+
+/**
+ * Track a phishing detection for cumulative analysis using Redis
+ */
+async function trackPhishingDetection(projectId: string, confidence: number, subject: string): Promise<void> {
+  const now = Date.now();
+  const key = `phishing:detections:${projectId}`;
+
+  // Store detection as sorted set member (score = timestamp)
+  // Value is JSON with confidence and subject
+  const detection: PhishingDetection = {timestamp: now, confidence, subject};
+  await redis.zadd(key, now, JSON.stringify(detection));
+
+  // Remove detections outside the time window
+  const cutoff = now - PHISHING_CUMULATIVE_WINDOW_MS;
+  await redis.zremrangebyscore(key, '-inf', cutoff);
+
+  // Set TTL to window duration to auto-cleanup old keys
+  await redis.expire(key, Math.ceil(PHISHING_CUMULATIVE_WINDOW_MS / 1000));
+}
+
+/**
+ * Get count of recent phishing detections for a project from Redis
+ */
+async function getRecentPhishingCount(projectId: string): Promise<number> {
+  const now = Date.now();
+  const cutoff = now - PHISHING_CUMULATIVE_WINDOW_MS;
+  const key = `phishing:detections:${projectId}`;
+
+  // Count detections within the time window
+  const count = await redis.zcount(key, cutoff, '+inf');
+  return count;
+}
+
+/**
+ * Clear phishing detection history for a project (e.g., after disable)
+ */
+async function clearPhishingHistory(projectId: string): Promise<void> {
+  const key = `phishing:detections:${projectId}`;
+  await redis.del(key);
+}
+
 interface RateData {
  total: number;
  bounces: number;
@@ -814,17 +869,42 @@ Set confidence to 100 only if you are absolutely certain it's phishing.`,
        signale.warn(
          `[PHISHING] Detected phishing content for project ${projectId} - Confidence: ${confidence}% - Reason: ${result.reason}`,
        );
+
+        // Track this detection for cumulative analysis
+        await trackPhishingDetection(projectId, confidence, subject);
+
+        // Get count of recent detections
+        const recentCount = await getRecentPhishingCount(projectId);
+        signale.info(
+          `[PHISHING] Project ${projectId} has ${recentCount} phishing detection(s) in the last ${PHISHING_CUMULATIVE_WINDOW_MS / 1000 / 60} minutes`,
+        );
      } else {
        signale.success(`[PHISHING] Passed phishing check for project: ${projectId}`);
      }

-      // Auto-disable project if 100% confidence
-      const shouldDisable = isPhishing && confidence === 100;
+      // Determine if project should be disabled
+      // Disable if EITHER:
+      // 1. Single detection with high confidence (>= threshold)
+      // 2. Multiple detections within time window (>= cumulative threshold)
+      const meetsConfidenceThreshold = isPhishing && confidence >= PHISHING_CONFIDENCE_THRESHOLD;
+      const recentCount = await getRecentPhishingCount(projectId);
+      const meetsCumulativeThreshold = isPhishing && recentCount >= PHISHING_CUMULATIVE_THRESHOLD;
+      const shouldDisable = meetsConfidenceThreshold || meetsCumulativeThreshold;

      if (shouldDisable) {
-        signale.error(
-          `[PHISHING] High confidence phishing detected (${confidence}%) - will disable project ${projectId}`,
-        );
+        if (meetsConfidenceThreshold) {
+          signale.error(
+            `[PHISHING] High confidence phishing detected (${confidence}% >= ${PHISHING_CONFIDENCE_THRESHOLD}%) - will disable project ${projectId}`,
+          );
+        }
+        if (meetsCumulativeThreshold) {
+          signale.error(
+            `[PHISHING] Cumulative threshold reached (${recentCount} >= ${PHISHING_CUMULATIVE_THRESHOLD} detections) - will disable project ${projectId}`,
+          );
+        }
+
+        // Clear history after disabling
+        await clearPhishingHistory(projectId);
      }

      return {
@@ -5,116 +5,137 @@ description: Configuration reference

 ## Security & Database

-| Variable | Required | Description | Example |
-|---|---|---|---|
-| `JWT_SECRET` | Yes | Secret key used to sign JWT tokens. Generate with `openssl rand -base64 32`. | `s3cr3t...` |
-| `DB_PASSWORD` | Yes | PostgreSQL database password. Used by the Docker Compose setup. | `changeme123` |
-| `DATABASE_URL` | Yes | Full PostgreSQL connection string. Auto-configured in Docker. | `postgresql://plunk:password@postgres:5432/plunk` |
-| `REDIS_URL` | Yes | Redis connection string. | `redis://redis:6379` |
-| `PORT` | No | Port the API server listens on. | `8080` (default) |
+| Variable       | Required | Description                                                                  | Example                                           |
+| -------------- | -------- | ---------------------------------------------------------------------------- | ------------------------------------------------- |
+| `JWT_SECRET`   | Yes      | Secret key used to sign JWT tokens. Generate with `openssl rand -base64 32`. | `s3cr3t...`                                       |
+| `DB_PASSWORD`  | Yes      | PostgreSQL database password. Used by the Docker Compose setup.              | `changeme123`                                     |
+| `DATABASE_URL` | Yes      | Full PostgreSQL connection string. Auto-configured in Docker.                | `postgresql://plunk:password@postgres:5432/plunk` |
+| `REDIS_URL`    | Yes      | Redis connection string.                                                     | `redis://redis:6379`                              |
+| `PORT`         | No       | Port the API server listens on.                                              | `8080` (default)                                  |

 ## URLs & Domains

 Set your subdomains here. The application automatically derives all internal and client-side URLs from these at container startup — you don't need to set `*_URI` or `NEXT_PUBLIC_*` variables manually.

-| Variable | Required | Description | Example |
-|---|---|---|---|
-| `API_DOMAIN` | Yes | Subdomain for the API server. | `api.yourdomain.com` |
-| `DASHBOARD_DOMAIN` | Yes | Subdomain for the dashboard app. | `app.yourdomain.com` |
-| `LANDING_DOMAIN` | Yes | Subdomain for the landing page. | `www.yourdomain.com` |
-| `WIKI_DOMAIN` | Yes | Subdomain for the documentation site. | `docs.yourdomain.com` |
-| `USE_HTTPS` | No | Set to `true` when running behind a TLS-terminating reverse proxy. Used to construct URLs with the correct protocol. | `false` (default) |
+| Variable           | Required | Description                                                                                                          | Example               |
+| ------------------ | -------- | -------------------------------------------------------------------------------------------------------------------- | --------------------- |
+| `API_DOMAIN`       | Yes      | Subdomain for the API server.                                                                                        | `api.yourdomain.com`  |
+| `DASHBOARD_DOMAIN` | Yes      | Subdomain for the dashboard app.                                                                                     | `app.yourdomain.com`  |
+| `LANDING_DOMAIN`   | Yes      | Subdomain for the landing page.                                                                                      | `www.yourdomain.com`  |
+| `WIKI_DOMAIN`      | Yes      | Subdomain for the documentation site.                                                                                | `docs.yourdomain.com` |
+| `USE_HTTPS`        | No       | Set to `true` when running behind a TLS-terminating reverse proxy. Used to construct URLs with the correct protocol. | `false` (default)     |

 ## AWS SES

-| Variable | Required | Description | Example |
-|---|---|---|---|
-| `AWS_SES_REGION` | Yes | AWS region where SES is configured. | `us-east-1` |
-| `AWS_SES_ACCESS_KEY_ID` | Yes | AWS access key ID with SES send permissions. | `AKIA...` |
-| `AWS_SES_SECRET_ACCESS_KEY` | Yes | AWS secret access key for SES. | `wJalr...` |
-| `SES_CONFIGURATION_SET` | No | SES configuration set name used for open/click tracking. | `plunk-configuration-set` (default) |
-| `SES_CONFIGURATION_SET_NO_TRACKING` | No | A second SES configuration set without tracking. When set, projects can toggle email tracking on/off. If omitted, the tracking toggle is hidden. | `plunk-no-tracking-configuration-set` (default) |
+| Variable                            | Required | Description                                                                                                                                      | Example                                         |
+| ----------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------- |
+| `AWS_SES_REGION`                    | Yes      | AWS region where SES is configured.                                                                                                              | `us-east-1`                                     |
+| `AWS_SES_ACCESS_KEY_ID`             | Yes      | AWS access key ID with SES send permissions.                                                                                                     | `AKIA...`                                       |
+| `AWS_SES_SECRET_ACCESS_KEY`         | Yes      | AWS secret access key for SES.                                                                                                                   | `wJalr...`                                      |
+| `SES_CONFIGURATION_SET`             | No       | SES configuration set name used for open/click tracking.                                                                                         | `plunk-configuration-set` (default)             |
+| `SES_CONFIGURATION_SET_NO_TRACKING` | No       | A second SES configuration set without tracking. When set, projects can toggle email tracking on/off. If omitted, the tracking toggle is hidden. | `plunk-no-tracking-configuration-set` (default) |

 ## Storage (Minio)

 The bundled Docker setup includes Minio with defaults that work out of the box. Only change these when connecting to an external S3-compatible bucket.

-| Variable | Required | Description | Default |
-|---|---|---|---|
-| `MINIO_ROOT_USER` | No | Minio root username (Docker Compose only). | `plunk` |
-| `MINIO_ROOT_PASSWORD` | No | Minio root password (Docker Compose only). | `plunkminiopass` |
-| `MINIO_API_PORT` | No | Port for the Minio API (Docker Compose only). | `9000` |
-| `MINIO_CONSOLE_PORT` | No | Port for the Minio console UI (Docker Compose only). | `9001` |
-| `S3_ENDPOINT` | No | S3 or Minio endpoint URL. | `http://minio:9000` |
-| `S3_ACCESS_KEY_ID` | No | S3 or Minio access key. | — |
-| `S3_ACCESS_KEY_SECRET` | No | S3 or Minio secret key. | — |
-| `S3_BUCKET` | No | Bucket name for file uploads. | `uploads` |
-| `S3_PUBLIC_URL` | No | Publicly accessible base URL for stored files. | — |
-| `S3_FORCE_PATH_STYLE` | No | Use path-style URLs instead of virtual-hosted. Required for Minio. | `true` |
+| Variable               | Required | Description                                                        | Default             |
+| ---------------------- | -------- | ------------------------------------------------------------------ | ------------------- |
+| `MINIO_ROOT_USER`      | No       | Minio root username (Docker Compose only).                         | `plunk`             |
+| `MINIO_ROOT_PASSWORD`  | No       | Minio root password (Docker Compose only).                         | `plunkminiopass`    |
+| `MINIO_API_PORT`       | No       | Port for the Minio API (Docker Compose only).                      | `9000`              |
+| `MINIO_CONSOLE_PORT`   | No       | Port for the Minio console UI (Docker Compose only).               | `9001`              |
+| `S3_ENDPOINT`          | No       | S3 or Minio endpoint URL.                                          | `http://minio:9000` |
+| `S3_ACCESS_KEY_ID`     | No       | S3 or Minio access key.                                            | —                   |
+| `S3_ACCESS_KEY_SECRET` | No       | S3 or Minio secret key.                                            | —                   |
+| `S3_BUCKET`            | No       | Bucket name for file uploads.                                      | `uploads`           |
+| `S3_PUBLIC_URL`        | No       | Publicly accessible base URL for stored files.                     | —                   |
+| `S3_FORCE_PATH_STYLE`  | No       | Use path-style URLs instead of virtual-hosted. Required for Minio. | `true`              |

 ## SMTP Server

 The optional SMTP relay lets you send emails through Plunk via the SMTP protocol.

-| Variable | Required | Description | Default |
-|---|---|---|---|
-| `SMTP_DOMAIN` | No | SMTP relay domain. Required when using Traefik's `acme.json` with multiple certificates so the correct cert can be selected. | `localhost` |
-| `SMTP_ENABLED` | No | Explicitly enable SMTP features in the UI. Automatically enabled when `SMTP_DOMAIN` is set to a non-localhost value in production. | `false` |
-| `PORT_SECURE` | No | SMTPS port (implicit TLS). | `465` |
-| `PORT_SUBMISSION` | No | SMTP submission port (STARTTLS). | `587` |
-| `MAX_RECIPIENTS` | No | Maximum number of recipients per email. | `5` |
+| Variable          | Required | Description                                                                                                                        | Default     |
+| ----------------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------- | ----------- |
+| `SMTP_DOMAIN`     | No       | SMTP relay domain. Required when using Traefik's `acme.json` with multiple certificates so the correct cert can be selected.       | `localhost` |
+| `SMTP_ENABLED`    | No       | Explicitly enable SMTP features in the UI. Automatically enabled when `SMTP_DOMAIN` is set to a non-localhost value in production. | `false`     |
+| `PORT_SECURE`     | No       | SMTPS port (implicit TLS).                                                                                                         | `465`       |
+| `PORT_SUBMISSION` | No       | SMTP submission port (STARTTLS).                                                                                                   | `587`       |
+| `MAX_RECIPIENTS`  | No       | Maximum number of recipients per email.                                                                                            | `5`         |

 ## OAuth

 Enables social login. Register an OAuth app with each provider and add the credentials here.

-| Variable | Required | Description |
-|---|---|---|
-| `GITHUB_OAUTH_CLIENT` | No | GitHub OAuth app client ID. |
-| `GITHUB_OAUTH_SECRET` | No | GitHub OAuth app client secret. |
-| `GOOGLE_OAUTH_CLIENT` | No | Google OAuth app client ID. |
-| `GOOGLE_OAUTH_SECRET` | No | Google OAuth app client secret. |
+| Variable              | Required | Description                     |
+| --------------------- | -------- | ------------------------------- |
+| `GITHUB_OAUTH_CLIENT` | No       | GitHub OAuth app client ID.     |
+| `GITHUB_OAUTH_SECRET` | No       | GitHub OAuth app client secret. |
+| `GOOGLE_OAUTH_CLIENT` | No       | Google OAuth app client ID.     |
+| `GOOGLE_OAUTH_SECRET` | No       | Google OAuth app client secret. |

 ## Stripe

 Required if you want to enable billing features. All Stripe variables must be set together for billing to activate.

-| Variable | Required | Description |
-|---|---|---|
-| `STRIPE_SK` | No | Stripe secret key. |
-| `STRIPE_WEBHOOK_SECRET` | No | Stripe webhook signing secret for verifying events. |
-| `STRIPE_PRICE_ONBOARDING` | No | Stripe price ID for the one-time onboarding fee. |
-| `STRIPE_PRICE_EMAIL_USAGE` | No | Stripe price ID for metered pay-per-email usage. |
-| `STRIPE_METER_EVENT_NAME` | No | Stripe meter event name. | `emails` (default) |
+| Variable                   | Required | Description                                         |
+| -------------------------- | -------- | --------------------------------------------------- | ------------------ |
+| `STRIPE_SK`                | No       | Stripe secret key.                                  |
+| `STRIPE_WEBHOOK_SECRET`    | No       | Stripe webhook signing secret for verifying events. |
+| `STRIPE_PRICE_ONBOARDING`  | No       | Stripe price ID for the one-time onboarding fee.    |
+| `STRIPE_PRICE_EMAIL_USAGE` | No       | Stripe price ID for metered pay-per-email usage.    |
+| `STRIPE_METER_EVENT_NAME`  | No       | Stripe meter event name.                            | `emails` (default) |

 ## Platform Emails

 When configured, Plunk will send email notifications to users for critical events (e.g. project disabled, billing limits reached). Without these, only ntfy notifications are sent.

-| Variable | Required | Description | Example |
-|---|---|---|---|
-| `PLUNK_API_KEY` | No | API key for a Plunk instance to send transactional emails. | `pk_...` |
-| `PLUNK_FROM_ADDRESS` | No | From address used for platform notification emails. | `noreply@yourdomain.com` |
+| Variable             | Required | Description                                                | Example                  |
+| -------------------- | -------- | ---------------------------------------------------------- | ------------------------ |
+| `PLUNK_API_KEY`      | No       | API key for a Plunk instance to send transactional emails. | `pk_...`                 |
+| `PLUNK_FROM_ADDRESS` | No       | From address used for platform notification emails.        | `noreply@yourdomain.com` |

 ## Notifications (ntfy)

 Plunk bundles a self-hosted [ntfy](https://ntfy.sh) server for internal system notifications.

-| Variable | Required | Description | Default |
-|---|---|---|---|
-| `NTFY_PORT` | No | Port for the ntfy web UI (Docker Compose only). | `8080` |
-| `NTFY_URL` | No | ntfy topic URL. Change this to use an external ntfy.sh server or your own instance. | `http://ntfy/plunk-notifications` |
+| Variable    | Required | Description                                                                         | Default                           |
+| ----------- | -------- | ----------------------------------------------------------------------------------- | --------------------------------- |
+| `NTFY_PORT` | No       | Port for the ntfy web UI (Docker Compose only).                                     | `8080`                            |
+| `NTFY_URL`  | No       | ntfy topic URL. Change this to use an external ntfy.sh server or your own instance. | `http://ntfy/plunk-notifications` |

 ## User Management

-| Variable | Required | Description | Default |
-|---|---|---|---|
-| `DISABLE_SIGNUPS` | No | When `true`, the signup endpoint rejects new registrations. Useful for private instances. | `false` |
-| `VERIFY_EMAIL_ON_SIGNUP` | No | When `true`, validates emails on signup — checks for disposable domains, plus-addressing, domain existence, and MX records. | `false` |
+| Variable                 | Required | Description                                                                                                                 | Default |
+| ------------------------ | -------- | --------------------------------------------------------------------------------------------------------------------------- | ------- |
+| `DISABLE_SIGNUPS`        | No       | When `true`, the signup endpoint rejects new registrations. Useful for private instances.                                   | `false` |
+| `VERIFY_EMAIL_ON_SIGNUP` | No       | When `true`, validates emails on signup — checks for disposable domains, plus-addressing, domain existence, and MX records. | `false` |

 ## Security

-| Variable | Required | Description | Default |
-|---|---|---|---|
-| `AUTO_PROJECT_DISABLE` | No | When `true`, projects are automatically suspended when bounce or complaint rate thresholds are exceeded. Set to `false` to manage project status manually. | `true` |
-| `EMAIL_RATE_LIMIT_PER_SECOND` | No | Override the email sending rate limit. If not set, Plunk automatically fetches the quota from your AWS SES account. | — |
+| Variable                      | Required | Description                                                                                                                                                | Default |
+| ----------------------------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
+| `AUTO_PROJECT_DISABLE`        | No       | When `true`, projects are automatically suspended when bounce or complaint rate thresholds are exceeded. Set to `false` to manage project status manually. | `true`  |
+| `EMAIL_RATE_LIMIT_PER_SECOND` | No       | Override the email sending rate limit. If not set, Plunk automatically fetches the quota from your AWS SES account.                                        | —       |
+
+## Phishing Detection
+
+Plunk can use AI to detect and block phishing emails before they're sent. Requires an [OpenRouter](https://openrouter.ai) API key.
+
+| Variable                         | Required | Description                                                                                       | Default                    |
+| -------------------------------- | -------- | ------------------------------------------------------------------------------------------------- | -------------------------- |
+| `OPENROUTER_API_KEY`             | No       | OpenRouter API key. When set, enables AI-powered phishing detection.                              | —                          |
+| `OPENROUTER_MODEL`               | No       | LLM model to use for content analysis. See [OpenRouter models](https://openrouter.ai/models).     | `anthropic/claude-3-haiku` |
+| `PHISHING_DETECTION_SAMPLE_RATE` | No       | Percentage of emails to check (0.0-1.0). For example, `0.1` means 10% of emails are analyzed.     | `0.1` (10%)                |
+| `PHISHING_CONFIDENCE_THRESHOLD`  | No       | Minimum confidence percentage (0-100) required to auto-disable a project from a single detection. | `85`                       |
+| `PHISHING_CUMULATIVE_THRESHOLD`  | No       | Number of phishing detections within the time window required to auto-disable a project.          | `3`                        |
+| `PHISHING_CUMULATIVE_WINDOW_MS`  | No       | Time window in milliseconds for cumulative phishing tracking.                                     | `3600000` (1 hour)         |
+
+**How it works:**
+
+- A random sample of emails (controlled by `PHISHING_DETECTION_SAMPLE_RATE`) are analyzed by the LLM for phishing content.
+- Projects are automatically disabled if **either**:
+  1. A single email is detected with confidence ≥ `PHISHING_CONFIDENCE_THRESHOLD`, **or**
+  2. `PHISHING_CUMULATIVE_THRESHOLD` or more emails are flagged within the `PHISHING_CUMULATIVE_WINDOW_MS` time window.
+- Detection history is stored in Redis and shared across all worker instances.