All files / storage db.ts

91.12% Statements 154/169
78.57% Branches 55/70
94.44% Functions 17/18
94.52% Lines 138/146

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378                  30x     42x 42x 31x   11x     42x                   42x 42x   42x   31x                 42x 42x 42x 42x 42x   42x               42x 42x 42x 42x   42x   42x         42x 42x 42x 42x 42x 42x 42x           42x 42x           42x   42x 42x   42x 42x       528x       528x 515x     528x     528x   528x 520x 14975x 14975x 14975x   520x               30x         516x     516x 1x     1x 1x 1x 1x 1x           518x 518x     518x     518x 1x 1x 1x 1x 1x     1x 1x         518x     518x 1x 1x 1x 1x 1x           1x 1x         518x       518x 3x 3x 3x   3x 2x     3x 3x               520x   520x           520x   3x   3x 3x     3x     3x 3x 3x         3x 3x               520x   520x   520x       520x   3x   3x 3x   3x     3x 3x 3x         3x 3x           520x     520x 520x       520x 3x     3x 3x 3x     3x             520x   520x     520x 520x                                             520x           520x   4x     4x   4x 4x 4x 4x     4x 4x   4x 4x 4x                     4x           4x            
import { createHash } from "node:crypto";
import { statSync } from "node:fs";
import { totalmem } from "node:os";
import { join } from "node:path";
import { deflateSync, inflateSync } from "node:zlib";
import Database from "better-sqlite3-multiple-ciphers";
import { htmlToText } from "./html-to-text.js";
import { MIGRATIONS, SCHEMA_VERSION } from "./schema.js";
 
const DATA_DIR = process.env.STORK_DATA_DIR || "./data";
 
export function attachBlobsDb(db: Database.Database, blobsPath: string, vaultKey?: Buffer): void {
	const escapedPath = blobsPath.replace(/'/g, "''");
	if (vaultKey) {
		db.exec(`ATTACH DATABASE '${escapedPath}' AS blobs KEY "x'${vaultKey.toString("hex")}'"`);
	} else {
		db.exec(`ATTACH DATABASE '${escapedPath}' AS blobs`);
	}
	// Ensure the blobs table exists in the attached DB (idempotent)
	db.exec(
		`CREATE TABLE IF NOT EXISTS blobs.attachment_blobs (content_hash TEXT PRIMARY KEY, data BLOB NOT NULL)`,
	);
}
 
export function openDatabase(
	filename = "stork.db",
	dataDir = DATA_DIR,
	vaultKey?: Buffer,
): Database.Database {
	const dbPath = join(dataDir, filename);
	const db = new Database(dbPath);
 
	if (vaultKey) {
		// Supply raw 32-byte vault key as hex to SQLCipher (bypasses SQLCipher's PBKDF2)
		db.exec(`PRAGMA key = "x'${vaultKey.toString("hex")}'";`);
	}
 
	// Scale mmap_size based on DB file size and available RAM.
	// Cap at 50% of total system RAM so we don't crowd out other processes,
	// but don't exceed the file size (mapping more than the file wastes VA space).
	// Minimum 256 MB for small DBs. On a machine with 32 GB RAM and a 16 GB DB,
	// this maps the entire file; on a machine with 8 GB RAM it maps up to 4 GB
	// (25% of the DB — still far better than the old fixed 256 MB / 1.5%).
	let mmapSize = 268435456; // 256 MB default
	try {
		const fileSize = statSync(dbPath).size;
		const ramCap = Math.floor(totalmem() / 2);
		mmapSize = Math.min(fileSize, ramCap);
		// Floor at 256 MB for small DBs
		Eif (mmapSize < 268435456) mmapSize = 268435456;
	} catch {
		// File may not exist yet (first run)
	}
 
	// Scale page cache to DB size: 64 MB baseline, up to 512 MB for large DBs.
	// For a 16 GB DB, 512 MB cache keeps ~3% of pages resident (vs 0.4% at 64 MB).
	// Combined with mmap, this ensures indices and recent message pages stay hot.
	let cacheSizeKB = 65536; // 64 MB
	try {
		const fileSize = statSync(dbPath).size;
		Iif (fileSize > 2 * 1024 * 1024 * 1024)
			cacheSizeKB = 524288; // 512 MB for >2 GB DBs
		else if (IfileSize > 512 * 1024 * 1024)
			cacheSizeKB = 262144; // 256 MB for >512 MB DBs
		else if (IfileSize > 128 * 1024 * 1024) cacheSizeKB = 131072; // 128 MB for >128 MB DBs
	} catch {
		// First run — use default
	}
 
	db.exec("PRAGMA journal_mode = WAL");
	db.exec("PRAGMA foreign_keys = ON");
	db.exec("PRAGMA busy_timeout = 30000");
	db.exec(`PRAGMA cache_size = -${cacheSizeKB}`);
	db.exec("PRAGMA temp_store = MEMORY");
	db.exec("PRAGMA synchronous = NORMAL");
	db.exec(`PRAGMA mmap_size = ${mmapSize}`);
 
	// Checkpoint and truncate the WAL before anything else. After a crash
	// the WAL can be very large (GBs for a big mailbox); leaving it un-
	// checkpointed means the first writer races against readers and both
	// suffer. TRUNCATE mode resets the WAL file to zero bytes.
	try {
		db.exec("PRAGMA wal_checkpoint(TRUNCATE)");
	} catch {
		// Checkpoint can fail if another connection holds a read lock — safe to skip
	}
 
	// Let SQLite analyse tables and pick better query plans for the current data
	db.exec("PRAGMA optimize");
 
	const blobsPath = join(dataDir, `${filename.replace(/\.db$/, "")}-blobs.db`);
	attachBlobsDb(db, blobsPath, vaultKey);
 
	ensureSchema(db);
	return db;
}
 
export function ensureSchema(db: Database.Database): void {
	const hasTable = db
		.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='schema_version'")
		.get();
 
	if (!hasTable) {
		db.exec(MIGRATIONS[0]);
	}
 
	const row = db.prepare("SELECT version FROM schema_version").get() as
		| { version: number }
		| undefined;
	const currentVersion = row?.version ?? 0;
 
	if (currentVersion < SCHEMA_VERSION) {
		for (let i = currentVersion; i < SCHEMA_VERSION; i++) {
			const hook = PRE_MIGRATION_HOOKS[i + 1];
			if (hook) hook(db);
			db.exec(MIGRATIONS[i]);
		}
		db.prepare("UPDATE schema_version SET version = ?").run(SCHEMA_VERSION);
	}
}
 
/**
 * Pre-migration hooks: JS code that must run before a specific migration's SQL.
 * Keyed by target schema version (1-indexed, matching SCHEMA_VERSION).
 */
const PRE_MIGRATION_HOOKS: Record<number, (db: Database.Database) => void> = {
	// v21: Hash existing inline attachment data into attachment_blobs before the
	// SQL migration drops the data column from attachments.  Note: array index
	// aligns with target schema version (1-indexed).
	21: (db) => {
		const rows = db
			.prepare("SELECT id, data FROM attachments WHERE data IS NOT NULL AND content_hash IS NULL")
			.all() as Array<{ id: number; data: Buffer }>;
		if (rows.length === 0) return;
		const insertBlob = db.prepare(
			"INSERT OR IGNORE INTO attachment_blobs (content_hash, data) VALUES (?, ?)",
		);
		const setHash = db.prepare("UPDATE attachments SET content_hash = ? WHERE id = ?");
		for (const row of rows) {
			const hash = createHash("sha256").update(row.data).digest("hex");
			insertBlob.run(hash, row.data);
			setHash.run(hash, row.id);
		}
	},
 
	// v23: Batch-compress existing html_body, raw_headers, and attachment blobs.
	23: (db) => {
		const BATCH_SIZE = 500;
		const compressOpts = { level: 6 };
 
		// Compress html_body — only rows where it's a non-null TEXT (typeof string)
		const htmlRows = db
			.prepare("SELECT id, html_body FROM messages WHERE html_body IS NOT NULL")
			.all() as Array<{ id: number; html_body: string | Buffer }>;
		if (htmlRows.length > 0) {
			const updateHtml = db.prepare("UPDATE messages SET html_body = ? WHERE id = ?");
			const htmlTxn = db.transaction((batch: typeof htmlRows) => {
				for (const row of batch) {
					Iif (typeof row.html_body !== "string") continue; // already compressed
					updateHtml.run(deflateSync(Buffer.from(row.html_body, "utf-8"), compressOpts), row.id);
				}
			});
			for (let i = 0; i < htmlRows.length; i += BATCH_SIZE) {
				htmlTxn(htmlRows.slice(i, i + BATCH_SIZE));
			}
		}
 
		// Compress raw_headers
		const headerRows = db
			.prepare("SELECT id, raw_headers FROM messages WHERE raw_headers IS NOT NULL")
			.all() as Array<{ id: number; raw_headers: string | Buffer }>;
		if (headerRows.length > 0) {
			const updateHeaders = db.prepare("UPDATE messages SET raw_headers = ? WHERE id = ?");
			const headerTxn = db.transaction((batch: typeof headerRows) => {
				for (const row of batch) {
					Iif (typeof row.raw_headers !== "string") continue;
					updateHeaders.run(
						deflateSync(Buffer.from(row.raw_headers, "utf-8"), compressOpts),
						row.id,
					);
				}
			});
			for (let i = 0; i < headerRows.length; i += BATCH_SIZE) {
				headerTxn(headerRows.slice(i, i + BATCH_SIZE));
			}
		}
 
		// Compress attachment blobs
		const blobRows = db.prepare("SELECT content_hash, data FROM attachment_blobs").all() as Array<{
			content_hash: string;
			data: Buffer;
		}>;
		if (blobRows.length > 0) {
			const updateBlob = db.prepare("UPDATE attachment_blobs SET data = ? WHERE content_hash = ?");
			const blobTxn = db.transaction((batch: typeof blobRows) => {
				for (const row of batch) {
					// Skip if already compressed (starts with zlib header 0x78)
					if (row.data.length >= 2 && row.data[0] === 0x78) continue;
					updateBlob.run(deflateSync(row.data, compressOpts), row.content_hash);
				}
			});
			for (let i = 0; i < blobRows.length; i += BATCH_SIZE) {
				blobTxn(blobRows.slice(i, i + BATCH_SIZE));
			}
		}
	},
 
	// v24: Backfill text_body for HTML-only messages (html_body present, text_body NULL).
	// Decompresses html_body, strips HTML to plain text, and stores in text_body for FTS5 indexing.
	24: (db) => {
		const BATCH_SIZE = 500;
 
		const rows = db
			.prepare(
				"SELECT id, html_body FROM messages WHERE text_body IS NULL AND html_body IS NOT NULL",
			)
			.all() as Array<{ id: number; html_body: string | Buffer }>;
 
		if (rows.length === 0) return;
 
		const updateText = db.prepare("UPDATE messages SET text_body = ? WHERE id = ?");
 
		const txn = db.transaction((batch: typeof rows) => {
			for (const row of batch) {
				// Decompress html_body if it's a Buffer (compressed), otherwise use string directly
				const html =
					typeof row.html_body === "string"
						? row.html_body
						: inflateSync(row.html_body).toString("utf-8");
				const text = htmlToText(html);
				Eif (text) {
					updateText.run(text, row.id);
				}
			}
		});
 
		for (let i = 0; i < rows.length; i += BATCH_SIZE) {
			txn(rows.slice(i, i + BATCH_SIZE));
		}
	},
 
	// v25: Add html_text_body column and backfill from html_body.
	// This gives every message with HTML a dedicated searchable text extraction,
	// separate from text_body (the original plain-text MIME part).
	25: (db) => {
		db.exec("ALTER TABLE messages ADD COLUMN html_text_body TEXT");
 
		const BATCH_SIZE = 500;
 
		const rows = db
			.prepare("SELECT id, html_body FROM messages WHERE html_body IS NOT NULL")
			.all() as Array<{ id: number; html_body: string | Buffer }>;
 
		if (rows.length === 0) return;
 
		const updateHtmlText = db.prepare("UPDATE messages SET html_text_body = ? WHERE id = ?");
 
		const txn = db.transaction((batch: typeof rows) => {
			for (const row of batch) {
				const html =
					typeof row.html_body === "string"
						? row.html_body
						: inflateSync(row.html_body).toString("utf-8");
				const text = htmlToText(html);
				Eif (text) {
					updateHtmlText.run(text, row.id);
				}
			}
		});
 
		for (let i = 0; i < rows.length; i += BATCH_SIZE) {
			txn(rows.slice(i, i + BATCH_SIZE));
		}
	},
 
	// v27: Move attachment_blobs from main DB to the attached blobs DB.
	27: (db) => {
		const hasTable = db
			.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='attachment_blobs'")
			.get();
		Iif (!hasTable) return;
		const rows = db.prepare("SELECT content_hash, data FROM attachment_blobs").all() as Array<{
			content_hash: string;
			data: Buffer;
		}>;
		if (rows.length === 0) return;
		const insert = db.prepare(
			"INSERT OR IGNORE INTO blobs.attachment_blobs (content_hash, data) VALUES (?, ?)",
		);
		const txn = db.transaction(() => {
			for (const row of rows) {
				insert.run(row.content_hash, row.data);
			}
		});
		txn();
	},
 
	// v30: Backfill date column on message_labels before the SQL migration adds the
	// covering index. Batch the UPDATE so the WAL doesn't grow unboundedly —
	// important on 5+ GB databases.
	30: (db) => {
		const BATCH_SIZE = 50_000;
 
		const maxRow = db.prepare("SELECT MAX(rowid) AS max_rowid FROM message_labels").get() as
			| { max_rowid: number | null }
			| undefined;
		const maxRowid = maxRow?.max_rowid ?? 0;
		Eif (maxRowid === 0) return;
 
		const updateBatch = db.prepare(`
			UPDATE message_labels
			SET date = (SELECT m.date FROM messages m WHERE m.id = message_labels.message_id)
			WHERE rowid BETWEEN ? AND ?
			  AND date IS NULL
		`);
 
		const batchTxn = db.transaction((lo: number, hi: number) => {
			updateBatch.run(lo, hi);
		});
 
		for (let lo = 1; lo <= maxRowid; lo += BATCH_SIZE) {
			batchTxn(lo, lo + BATCH_SIZE - 1);
		}
	},
 
	// v26: Rename IMAP-sourced labels from folder leaf name to full folder path.
	// Multiple folders can share a leaf name (e.g., "Archive/Old/INBOX" and "INBOX"
	// both have name="INBOX"). This migration creates path-based labels and re-links
	// message_labels so each message points to its folder's full-path label.
	26: (db) => {
		const folders = db.prepare("SELECT id, path, name FROM folders").all() as Array<{
			id: number;
			path: string;
			name: string;
		}>;
 
		if (folders.length === 0) return;
 
		const upsertLabel = db.prepare(
			"INSERT INTO labels (name, source) VALUES (?, 'imap') ON CONFLICT(name) DO NOTHING",
		);
		const getLabelId = db.prepare("SELECT id FROM labels WHERE name = ?");
 
		const txn = db.transaction(() => {
			for (const folder of folders) {
				Iif (folder.path === folder.name) continue;
				upsertLabel.run(folder.path);
			}
 
			for (const folder of folders) {
				Iif (folder.path === folder.name) continue;
 
				const oldLabel = getLabelId.get(folder.name) as { id: number } | undefined;
				const newLabel = getLabelId.get(folder.path) as { id: number } | undefined;
				Eif (!oldLabel || !newLabel) continue;
 
				db.prepare(`
					UPDATE OR IGNORE message_labels
					SET label_id = ?
					WHERE label_id = ? AND message_id IN (
						SELECT id FROM messages WHERE folder_id = ?
					)
				`).run(newLabel.id, oldLabel.id, folder.id);
			}
 
			db.exec(`
				DELETE FROM labels WHERE source = 'imap'
				AND id NOT IN (SELECT DISTINCT label_id FROM message_labels)
				AND name NOT IN (SELECT path FROM folders)
			`);
		});
		txn();
	},
};
 
export type { Database };
export default Database;