feat(tests): enhance test database setup and cleanup for improved isolation and performance

2026-05-22 21:01:03 +02:00
parent 71e2277643
commit 32dd7bba46
5 changed files with 137 additions and 104 deletions
@@ -65,6 +65,18 @@ jobs:
      - name: Install dependencies
        run: yarn install --frozen-lockfile

+      - name: Tune Postgres for ephemeral CI workload
+        env:
+          PGPASSWORD: postgres
+        run: |
+          # synchronous_commit=off is the biggest single I/O win and is safe to lose
+          # data on crash for a throwaway CI database.
+          # synchronous_commit is dynamic — applies on reload. max_connections would
+          # require a restart, so we leave it at the default of 100 and cap workers
+          # at 4 × connection_limit=20 = 80 to stay under that budget.
+          psql -h localhost -U postgres -d plunk_test -c "ALTER SYSTEM SET synchronous_commit = 'off';"
+          psql -h localhost -U postgres -d plunk_test -c "SELECT pg_reload_conf();"
+
      - name: Setup environment variables
        run: |
          cat > .env << EOF
@@ -1,6 +1,50 @@
 import {PrismaClient} from '@plunk/db';
 import {execSync} from 'child_process';

+// Snake-cased table names from prisma schema (see @@map directives).
+// Order doesn't matter — TRUNCATE with CASCADE handles FK dependencies in one statement.
+const TRUNCATE_TABLES = [
+  'events',
+  'workflow_step_executions',
+  'emails',
+  'workflow_executions',
+  'workflow_transitions',
+  'workflow_steps',
+  'workflows',
+  'campaigns',
+  'templates',
+  'segment_memberships',
+  'segments',
+  'contacts',
+  'domains',
+  'memberships',
+  'projects',
+  'users',
+];
+
+/**
+ * Connects to the admin `postgres` database to ensure the worker's test DB exists.
+ * Postgres has no `CREATE DATABASE IF NOT EXISTS`, so we check pg_database first.
+ */
+async function ensureDatabaseExists(databaseUrl: string, workerDbName: string) {
+  const adminUrl = new URL(databaseUrl);
+  adminUrl.pathname = '/postgres';
+  adminUrl.searchParams.delete('connection_limit');
+  adminUrl.searchParams.delete('pool_timeout');
+
+  const admin = new PrismaClient({datasources: {db: {url: adminUrl.toString()}}});
+  try {
+    const rows = await admin.$queryRawUnsafe<{exists: boolean}[]>(
+      `SELECT EXISTS(SELECT 1 FROM pg_database WHERE datname = '${workerDbName}') AS exists`,
+    );
+    if (!rows[0]?.exists) {
+      await admin.$executeRawUnsafe(`CREATE DATABASE "${workerDbName}"`);
+    }
+  } finally {
+    await admin.$disconnect();
+  }
+}
+
 /**
 * Test database helper
 * Manages test database isolation and cleanup
@@ -9,17 +53,22 @@ class TestDatabase {
  private prisma: PrismaClient | null = null;

  async initialize() {
-    // Use test database URL if provided, otherwise use main database
+    // setup.ts has already rewritten DATABASE_URL to include the per-worker DB name
+    // (e.g. plunk_test_w1, plunk_test_w2). We create that DB if missing, migrate it,
+    // then open the long-lived client we use for tests.
    const databaseUrl = process.env.TEST_DATABASE_URL || process.env.DATABASE_URL;
-
    if (!databaseUrl) {
      throw new Error('DATABASE_URL or TEST_DATABASE_URL must be set for testing');
    }

-    // Raise Prisma's connection pool above its default (num_cpus * 2 + 1, ~5 on CI).
-    // Bulk inserts in some tests (e.g. SecurityService) saturate the default pool
-    // and time out. Test Postgres has max_connections=100, so 20 is well under budget.
    const url = new URL(databaseUrl);
+    const workerDbName = url.pathname.replace(/^\//, '');
+    if (!workerDbName) {
+      throw new Error('DATABASE_URL must include a database name');
+    }
+
+    // Bump the pool above Prisma's default (~5 on CI). Test Postgres has
+    // max_connections=100; with N workers we want N*20 ≤ 100 — fine up to 4 workers.
    if (!url.searchParams.has('connection_limit')) {
      url.searchParams.set('connection_limit', '20');
    }
@@ -27,29 +76,33 @@ class TestDatabase {
      url.searchParams.set('pool_timeout', '20');
    }

-    this.prisma = new PrismaClient({
-      datasources: {
-        db: {
-          url: url.toString(),
-        },
-      },
-    });
+    await ensureDatabaseExists(databaseUrl, workerDbName);

-    // Connect to database
-    await this.prisma.$connect();
-
-    // Run migrations (only once per test suite)
+    // Run pending migrations against this worker's DB. `migrate deploy` is a no-op
+    // when up-to-date and avoids the drift prompts that `migrate dev` does.
    try {
-      execSync('yarn workspace @plunk/db migrate:dev', {
+      execSync('yarn workspace @plunk/db migrate:prod', {
        env: {
          ...process.env,
-          DATABASE_URL: databaseUrl,
+          DATABASE_URL: url.toString(),
+          DIRECT_DATABASE_URL: process.env.DIRECT_DATABASE_URL || url.toString(),
        },
-        stdio: 'ignore',
+        encoding: 'utf8',
+        stdio: ['ignore', 'pipe', 'pipe'],
      });
    } catch (error) {
-      console.warn('Migration warning (may already be up to date):', error);
+      const err = error as {stdout?: string; stderr?: string; message?: string};
+      console.error('Migration failed for', workerDbName);
+      if (err.stdout) console.error('stdout:', err.stdout);
+      if (err.stderr) console.error('stderr:', err.stderr);
+      if (!err.stdout && !err.stderr) console.error(err.message);
+      throw error;
    }
+
+    this.prisma = new PrismaClient({
+      datasources: {db: {url: url.toString()}},
+    });
+    await this.prisma.$connect();
  }

  /**
@@ -63,80 +116,32 @@ class TestDatabase {
  }

  /**
-   * Clean up database after each test
-   * Deletes all records in reverse order of dependencies
-   * Uses batched deletes to prevent memory issues with large datasets
-   * Retries on deadlock to handle race conditions with background event tracking
+   * Wipe all per-test data with a single TRUNCATE ... CASCADE statement.
+   * Roughly an order of magnitude faster than 14 sequential deleteMany calls
+   * — TRUNCATE skips the row scan and only touches table headers.
   */
  async cleanup() {
    if (!this.prisma) return;

+    const tables = TRUNCATE_TABLES.map(t => `"${t}"`).join(', ');
    const maxRetries = 3;
    let lastError: Error | null = null;

    for (let attempt = 1; attempt <= maxRetries; attempt++) {
      try {
-        // Use a transaction to ensure all deletes happen atomically
-        // This prevents foreign key constraint violations and race conditions
-        await this.prisma.$transaction([
-          // Level 1: Delete deepest dependencies first
-          this.prisma.event.deleteMany(),
-          this.prisma.workflowStepExecution.deleteMany(),
-
-          // Level 2: Delete entities that depend on Level 1
-          this.prisma.email.deleteMany(),
-          this.prisma.workflowExecution.deleteMany(),
-
-          // Level 3: Delete workflow structure
-          this.prisma.workflowTransition.deleteMany(),
-          this.prisma.workflowStep.deleteMany(),
-          this.prisma.workflow.deleteMany(),
-
-          // Level 4: Delete campaigns and templates
-          this.prisma.campaign.deleteMany(),
-          this.prisma.template.deleteMany(),
-
-          // Level 5: Delete segment relationships
-          this.prisma.segmentMembership.deleteMany(),
-          this.prisma.segment.deleteMany(),
-
-          // Level 6: Delete contacts
-          this.prisma.contact.deleteMany(),
-
-          // Level 7: Delete domains
-          this.prisma.domain.deleteMany(),
-
-          // Level 8: Delete memberships (has FK to both user and project)
-          this.prisma.membership.deleteMany(),
-
-          // Level 9: Delete projects
-          this.prisma.project.deleteMany(),
-
-          // Level 10: Delete users last
-          this.prisma.user.deleteMany(),
-        ]);
-
-        // Success - exit retry loop
+        await this.prisma.$executeRawUnsafe(`TRUNCATE TABLE ${tables} RESTART IDENTITY CASCADE`);
        return;
      } catch (error) {
        lastError = error as Error;
-
-        // Check if this is a deadlock error (PostgreSQL error code 40P01)
        const isDeadlock = error instanceof Error && error.message?.includes('deadlock detected');
-
        if (isDeadlock && attempt < maxRetries) {
-          // Wait before retrying (exponential backoff)
-          const delay = Math.pow(2, attempt) * 50; // 100ms, 200ms, 400ms
-          await new Promise(resolve => setTimeout(resolve, delay));
+          await new Promise(resolve => setTimeout(resolve, Math.pow(2, attempt) * 50));
          continue;
        }
-
-        // Not a deadlock or out of retries
        break;
      }
    }

-    // If we get here, all retries failed
    console.error(`Error cleaning up database after ${maxRetries} attempts:`, lastError);
    throw lastError;
  }
@@ -108,7 +108,9 @@ export class TestFactories {
  async createUser(options: UserFactoryOptions = {}) {
    const email = options.email || `user-${uniqueId()}@test.com`;
    const password = options.password || 'password123';
-    const hashedPassword = await bcrypt.hash(password, 10);
+    // Cost factor 4 is the bcrypt minimum — ~100x faster than the production cost of 10.
+    // Test users don't need real-world hash strength.
+    const hashedPassword = await bcrypt.hash(password, 4);

    return this.prisma.user.create({
      data: {
@@ -1,39 +1,52 @@
-import { beforeAll, afterAll, afterEach, vi } from 'vitest';
-import { testDatabase } from './helpers/database';
+// IMPORTANT: this file runs before each test file's imports execute.
+// We rewrite DATABASE_URL and REDIS_URL here so per-worker isolation is
+// applied before any service module constructs a Prisma/Redis client.
+
 import dotenv from 'dotenv';
 import path from 'path';
+import {afterAll, afterEach, beforeAll, vi} from 'vitest';

-// Load environment variables from root .env file
-dotenv.config({ path: path.resolve(__dirname, '../.env') });
+dotenv.config({path: path.resolve(__dirname, '../.env')});
+
+// Vitest assigns each worker a 1-based pool id; defaults to "1" for single-worker runs.
+const workerId = process.env.VITEST_POOL_ID || '1';
+
+if (process.env.DATABASE_URL) {
+  const url = new URL(process.env.DATABASE_URL);
+  const baseDb = url.pathname.replace(/^\//, '') || 'plunk_test';
+  url.pathname = `/${baseDb}_w${workerId}`;
+  process.env.DATABASE_URL = url.toString();
+  // Mirror onto DIRECT_DATABASE_URL so prisma migrate uses the same worker DB.
+  if (process.env.DIRECT_DATABASE_URL) {
+    const direct = new URL(process.env.DIRECT_DATABASE_URL);
+    direct.pathname = `/${baseDb}_w${workerId}`;
+    process.env.DIRECT_DATABASE_URL = direct.toString();
+  }
+}
+
+if (process.env.REDIS_URL) {
+  const url = new URL(process.env.REDIS_URL);
+  url.pathname = `/${(parseInt(workerId, 10) - 1) % 16}`;
+  process.env.REDIS_URL = url.toString();
+}
+
+process.env.NODE_ENV = 'test';
+process.env.JWT_SECRET = process.env.JWT_SECRET || 'test-jwt-secret-key-for-testing';
+
+// Static import is safe: database.ts only reads env in initialize(), which runs
+// in beforeAll — well after the env mutations above.
+import {testDatabase} from './helpers/database';

-// Global test setup
 beforeAll(async () => {
-  // Initialize test database
  await testDatabase.initialize();
 });

 afterEach(async () => {
-  // Clear all mocks first
  vi.clearAllMocks();
-
-  // Restore real timers
  vi.useRealTimers();
-
-  // Clean up database after each test
-  // This must be last to ensure proper cleanup order
  await testDatabase.cleanup();
-
-  // Force garbage collection hint (if available in test environment)
-  if (global.gc) {
-    global.gc();
-  }
 });

 afterAll(async () => {
-  // Disconnect from database
  await testDatabase.disconnect();
 });
-
-// Set test environment variables
-process.env.NODE_ENV = 'test';
-process.env.JWT_SECRET = process.env.JWT_SECRET || 'test-jwt-secret-key-for-testing';
@@ -22,18 +22,19 @@ export default defineConfig({
    },
    testTimeout: 30000,
    hookTimeout: 30000,
-    // Memory optimization: Run tests in sequence to prevent memory issues
-    // This is critical for tests that create large datasets
+    // Each fork is a worker with an isolated Postgres database and Redis db-number
+    // (see test/setup.ts). That isolation is what lets us run files in parallel
+    // without the cross-test interference we used to hit with a shared DB.
    pool: 'forks',
    poolOptions: {
      forks: {
-        singleFork: true, // Run all tests in a single fork to limit memory
+        // Cap at 4 to stay within Postgres' default max_connections=100
+        // when each worker uses connection_limit=20.
+        maxForks: 4,
+        minForks: 1,
      },
    },
-    // Run tests sequentially to avoid database cleanup conflicts
-    fileParallelism: false,
-    // Limit concurrent test files to reduce memory pressure
-    maxConcurrency: 3,
+    maxConcurrency: 5,
    // Only include our test files, not dependency tests
    include: [
      'apps/**/__tests__/**/*.{test,spec}.{ts,tsx}',