From 07ae3b74794bd05e489957f9eab197efe91c0325 Mon Sep 17 00:00:00 2001 From: Yll Date: Sat, 27 Sep 2025 12:57:01 +0200 Subject: [PATCH 1/2] Add content filtering: Notifications when sensitive content is detected, .crowdcodeignore for ignoring sensitive files similar to .gitignore --- .gitignore | 3 +- package.json | 10 ++ src/actionsProvider.ts | 12 ++ src/extension.ts | 34 ++++ src/recording.ts | 46 +++++- src/security.ts | 351 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 451 insertions(+), 5 deletions(-) create mode 100644 src/security.ts diff --git a/.gitignore b/.gitignore index 0fbf82e..c04db8c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ dist vs-code-recorder .vscode-test/ out -test-workspace/vscode-recorder/* \ No newline at end of file +test-workspace/vscode-recorder/* +.DS_Store diff --git a/package.json b/package.json index 91f7560..cf14d9b 100644 --- a/package.json +++ b/package.json @@ -65,6 +65,16 @@ { "command": "crowd-code.consent", "title": "crowd-code: Manage Data Collection Consent" + }, + { + "command": "crowd-code.reloadSecurityFilter", + "title": "crowd-code: Reload Security Filter", + "icon": "$(refresh)" + }, + { + "command": "crowd-code.openCrowdCodeIgnore", + "title": "crowd-code: Open .crowdcodeignore File", + "icon": "$(file-text)" } ], "viewsContainers": { diff --git a/src/actionsProvider.ts b/src/actionsProvider.ts index dc658e9..4880728 100644 --- a/src/actionsProvider.ts +++ b/src/actionsProvider.ts @@ -188,6 +188,18 @@ export class ActionsProvider implements vscode.TreeDataProvider { ) items.push(consentStatus) + // Security Actions + const openCrowdCodeIgnore = new ActionItem( + 'Open .crowdcodeignore', + vscode.TreeItemCollapsibleState.None, + { + command: 'crowd-code.openCrowdCodeIgnore', + title: 'Open .crowdcodeignore File', + }, + 'file-text' + ) + items.push(openCrowdCodeIgnore) + return items } diff --git a/src/extension.ts b/src/extension.ts index fb065c3..b890565 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -20,6 +20,7 @@ import { initializeGitProvider, cleanupGitProvider } from './gitProvider' import * as fs from 'node:fs' import * as path from 'node:path' import { showConsentChangeDialog, ensureConsent, hasConsent } from './consent' +import { reloadSecurityFilter } from './security' export let statusBarItem: vscode.StatusBarItem export let extContext: vscode.ExtensionContext @@ -191,6 +192,39 @@ export async function activate(context: vscode.ExtensionContext): Promise }) ) + // Register security-related commands + context.subscriptions.push( + vscode.commands.registerCommand('crowd-code.reloadSecurityFilter', async () => { + await reloadSecurityFilter() + vscode.window.showInformationMessage('Security filter reloaded from .crowdcodeignore') + }) + ) + + context.subscriptions.push( + vscode.commands.registerCommand('crowd-code.openCrowdCodeIgnore', async () => { + const workspaceFolder = vscode.workspace.workspaceFolders?.[0] + if (!workspaceFolder) { + vscode.window.showWarningMessage('No workspace folder found') + return + } + const ignoreFilePath = path.join(workspaceFolder.uri.fsPath, '.crowdcodeignore') + + if (!fs.existsSync(ignoreFilePath)) { + const create = await vscode.window.showInformationMessage( + '.crowdcodeignore file not found. Create it?', + 'Create', 'Cancel' + ) + if (create === 'Create') { + // This will trigger creation on next recording start + vscode.window.showInformationMessage('File will be created when recording starts.') + } + return + } + + await vscode.window.showTextDocument(vscode.Uri.file(ignoreFilePath)) + }) + ) + context.subscriptions.push(vscode.workspace.onDidChangeConfiguration(onConfigurationChange)) diff --git a/src/recording.ts b/src/recording.ts index e6b30df..ba7df85 100644 --- a/src/recording.ts +++ b/src/recording.ts @@ -21,6 +21,7 @@ import { } from './utilities' import { type File, ChangeType, type CSVRowBuilder, type Change, type Recording, type ConsentStatus } from './types' import { extContext, statusBarItem, actionsProvider } from './extension' +import { initializeSecurity, isFileIgnored, containsSensitiveContent, showFilteredContentNotification, getFilteredPlaceholder } from './security' export const commands = { openSettings: 'crowd-code.openSettings', @@ -91,10 +92,10 @@ export function buildCsvRow({ } /** - * Checks if the current file being edited is within the configured export path. + * Checks if the current file being edited is within the configured export path or should be ignored for security reasons. * This is used to determine if the current file should be recorded or not. * - * @returns {boolean} `true` if the current file is within the export path, `false` otherwise. + * @returns {boolean} `true` if the current file is within the export path or should be ignored, `false` otherwise. */ export function isCurrentFileExported(): boolean { const editor = vscode.window.activeTextEditor @@ -103,7 +104,19 @@ export function isCurrentFileExported(): boolean { if (!editor || !filename || !exportPath) { return false } - return filename.startsWith(exportPath) + + // Check if file is in export path + if (filename.startsWith(exportPath)) { + return true + } + + // Check if file should be ignored for security reasons + if (isFileIgnored(filename)) { + showFilteredContentNotification('File ignored by .crowdcodeignore', path.basename(filename)) + return true + } + + return false } const onChangeSubscription = vscode.workspace.onDidChangeTextDocument(event => { @@ -114,16 +127,38 @@ const onChangeSubscription = vscode.workspace.onDidChangeTextDocument(event => { if (isCurrentFileExported()) { return } + const editor = vscode.window.activeTextEditor if (editor && event.document === editor.document) { for (const change of event.contentChanges) { recording.sequence++ + + let textToRecord = change.text + + // Check for sensitive content in the change + const sensitiveCheck = containsSensitiveContent(change.text) + if (sensitiveCheck.isSensitive) { + const matchTypes = sensitiveCheck.matches.map(match => { + if (match.startsWith('sk-') || match.startsWith('pk-')) return 'API key' + if (match.startsWith('eyJ')) return 'JWT token' + if (match.includes('PRIVATE KEY')) return 'Private key' + if (match.includes('://') && match.includes('@')) return 'Connection string' + return 'Sensitive pattern' + }).join(', ') + + textToRecord = getFilteredPlaceholder(matchTypes) + showFilteredContentNotification( + 'Sensitive content detected', + `${matchTypes} - ${sensitiveCheck.matches.length} pattern(s) found` + ) + } + addToFileQueue( buildCsvRow({ sequence: recording.sequence, rangeOffset: change.rangeOffset, rangeLength: change.rangeLength, - text: change.text, + text: textToRecord, }) ) appendToFile() @@ -150,6 +185,9 @@ export async function startRecording(): Promise { logToOutput('Already recording', 'info') return } + + // Initialize security system + await initializeSecurity() const exportPath = getExportPath() if (!exportPath) { return diff --git a/src/security.ts b/src/security.ts new file mode 100644 index 0000000..36214bc --- /dev/null +++ b/src/security.ts @@ -0,0 +1,351 @@ +import * as fs from 'node:fs' +import * as path from 'node:path' +import * as vscode from 'vscode' +import { logToOutput } from './utilities' + +interface SecurityFilter { + filePatterns: string[] + contentPatterns: RegExp[] +} + +let securityFilter: SecurityFilter | null = null +let lastNotificationTime = 0 +const NOTIFICATION_THROTTLE_MS = 5000 // Only show notifications every 5 seconds + +// Default .crowdcodeignore template +const DEFAULT_CROWDCODE_IGNORE = `# crowd-code security ignore patterns +# This file prevents sensitive files and content from being recorded + +# Environment and configuration files +.env +.env.* +*.env +.envrc + +# Secret and credential files +**/secrets/** +**/secret/** +**/user-secrets/** +**/*secret* +**/*secrets* +**/*key* +**/*keys* +**/*token* +**/*tokens* +**/*password* +**/*passwords* +**/*credential* +**/*credentials* + +# Certificate and key files +*.pem +*.key +*.p12 +*.pfx +*.crt +*.cer +*.der +*.csr +*.jks +*.keystore +*.truststore + +# SSH and GPG files +**/.ssh/** +**/.gnupg/** +**/id_rsa* +**/id_ed25519* +**/id_ecdsa* +**/known_hosts* + +# Cloud provider credentials +**/.aws/** +**/.azure/** +**/.gcp/** +**/gcloud/** +**/.kube/config* + +# Application-specific secret files +**/appsettings.*.json +**/secrets.json +**/config/secrets.yaml +**/config/credentials.yaml + +# Backup and temporary files that might contain secrets +*.bak +*.backup +*.old +*.tmp +*~ +.#* +` + +/** + * Content patterns for detecting sensitive information + */ +const SENSITIVE_CONTENT_PATTERNS = [ + // API Keys - OpenAI, Anthropic, etc. + /sk-[a-zA-Z0-9]{32,}/g, + /pk-[a-zA-Z0-9]{32,}/g, + /sk_test_[a-zA-Z0-9_-]{24,}/g, + /sk_live_[a-zA-Z0-9_-]{24,}/g, + /rk_[a-zA-Z0-9_-]{24,}/g, + + // JWT Tokens (starts with ey and has two dots) + /eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+/g, + + // AWS Keys + /AKIA[0-9A-Z]{16}/g, + /ASIA[0-9A-Z]{16}/g, + /AROA[0-9A-Z]{16}/g, + + // Google API Keys + /AIza[0-9A-Za-z_-]{35}/g, + + // GitHub Tokens + /gh[pousr]_[A-Za-z0-9_]{36}/g, + /github_pat_[a-zA-Z0-9_]{82}/g, + + // Generic patterns + // Long base64 strings (likely secrets) + /[A-Za-z0-9+/]{40,}={0,2}/g, + + // Password/key assignments in code + /(password|passwd|pwd|secret|key|token|api_key|apikey)\s*[:=]\s*['""][^'""\s]{8,}['"']/gi, + /(password|passwd|pwd|secret|key|token|api_key|apikey)\s*[:=]\s*[^\s'"",;]{12,}/gi, + + // Private keys + /-----BEGIN[A-Z\s]+PRIVATE KEY-----/g, + /-----BEGIN RSA PRIVATE KEY-----/g, + /-----BEGIN DSA PRIVATE KEY-----/g, + /-----BEGIN EC PRIVATE KEY-----/g, + /-----BEGIN OPENSSH PRIVATE KEY-----/g, + + // Connection strings + /mongodb(\+srv)?:\/\/[^:\s]+:[^@\s]+@[^\s]+/gi, + /postgres(ql)?:\/\/[^:\s]+:[^@\s]+@[^\s]+/gi, + /mysql:\/\/[^:\s]+:[^@\s]+@[^\s]+/gi, + + // URLs with credentials + /https?:\/\/[^:\s]+:[^@\s]+@[^\s]+/gi, +] + +/** + * Initialize the security filter system + */ +export async function initializeSecurity(): Promise { + try { + await ensureCrowdCodeIgnoreFile() + await loadSecurityFilter() + logToOutput('Security filter system initialized', 'info') + } catch (error) { + logToOutput(`Error initializing security filter: ${error}`, 'error') + } +} + +/** + * Ensure .crowdcodeignore file exists in workspace root + */ +async function ensureCrowdCodeIgnoreFile(): Promise { + const workspaceFolder = vscode.workspace.workspaceFolders?.[0] + if (!workspaceFolder) { + return + } + + const ignoreFilePath = path.join(workspaceFolder.uri.fsPath, '.crowdcodeignore') + + if (!fs.existsSync(ignoreFilePath)) { + // Scan workspace for existing sensitive files and add them + const existingSensitiveFiles = await scanForSensitiveFiles(workspaceFolder.uri.fsPath) + + let ignoreContent = DEFAULT_CROWDCODE_IGNORE + + if (existingSensitiveFiles.length > 0) { + ignoreContent += '\n# Auto-detected sensitive files in this workspace:\n' + existingSensitiveFiles.forEach(file => { + ignoreContent += `${file}\n` + }) + } + + await fs.promises.writeFile(ignoreFilePath, ignoreContent, 'utf-8') + logToOutput(`Created .crowdcodeignore with ${existingSensitiveFiles.length} auto-detected sensitive files`, 'info') + + // Show notification to user + vscode.window.showInformationMessage( + `Created .crowdcodeignore file with security patterns. Found ${existingSensitiveFiles.length} existing sensitive files.`, + 'View File' + ).then((selection: string | undefined) => { + if (selection === 'View File') { + vscode.window.showTextDocument(vscode.Uri.file(ignoreFilePath)) + } + }) + } +} + +/** + * Scan workspace for existing sensitive files + */ +async function scanForSensitiveFiles(workspacePath: string): Promise { + const sensitiveFiles: string[] = [] + const sensitivePatterns = [ + '.env', '.env.*', '*.env', + '*secret*', '*key*', '*token*', '*password*', + '*.pem', '*.key', '*.p12', '*.pfx', + 'secrets.json', 'credentials.yaml' + ] + + try { + const files = await vscode.workspace.findFiles('**/*', '**/node_modules/**', 1000) + + for (const file of files) { + const relativePath = path.relative(workspacePath, file.fsPath) + + for (const pattern of sensitivePatterns) { + if (isGlobMatch(relativePath, pattern)) { + sensitiveFiles.push(relativePath) + break + } + } + } + } catch (error) { + logToOutput(`Error scanning for sensitive files: ${error}`, 'error') + } + + return [...new Set(sensitiveFiles)] // Remove duplicates +} + +/** + * Load and parse .crowdcodeignore file + */ +async function loadSecurityFilter(): Promise { + const workspaceFolder = vscode.workspace.workspaceFolders?.[0] + if (!workspaceFolder) { + securityFilter = { filePatterns: [], contentPatterns: SENSITIVE_CONTENT_PATTERNS } + return + } + + const ignoreFilePath = path.join(workspaceFolder.uri.fsPath, '.crowdcodeignore') + + try { + if (fs.existsSync(ignoreFilePath)) { + const content = await fs.promises.readFile(ignoreFilePath, 'utf-8') + const patterns = parseIgnoreFile(content) + securityFilter = { + filePatterns: patterns, + contentPatterns: SENSITIVE_CONTENT_PATTERNS + } + } else { + securityFilter = { filePatterns: [], contentPatterns: SENSITIVE_CONTENT_PATTERNS } + } + } catch (error) { + logToOutput(`Error loading .crowdcodeignore: ${error}`, 'error') + securityFilter = { filePatterns: [], contentPatterns: SENSITIVE_CONTENT_PATTERNS } + } +} + +/** + * Parse .crowdcodeignore file content into patterns + */ +function parseIgnoreFile(content: string): string[] { + return content + .split('\n') + .map(line => line.trim()) + .filter(line => line && !line.startsWith('#')) + .filter(line => line.length > 0) +} + +/** + * Check if a file should be ignored based on .crowdcodeignore patterns + */ +export function isFileIgnored(filePath: string): boolean { + if (!securityFilter) { + return false + } + + const workspaceFolder = vscode.workspace.workspaceFolders?.[0] + if (!workspaceFolder) { + return false + } + + const relativePath = path.relative(workspaceFolder.uri.fsPath, filePath) + + return securityFilter.filePatterns.some(pattern => isGlobMatch(relativePath, pattern)) +} + +/** + * Check if content contains sensitive information + */ +export function containsSensitiveContent(text: string): { isSensitive: boolean; matches: string[] } { + if (!securityFilter || !text || text.trim().length === 0) { + return { isSensitive: false, matches: [] } + } + + const matches: string[] = [] + + for (const pattern of securityFilter.contentPatterns) { + const patternMatches = text.match(pattern) + if (patternMatches) { + matches.push(...patternMatches) + } + } + + return { + isSensitive: matches.length > 0, + matches: [...new Set(matches)] // Remove duplicates + } +} + +/** + * Show throttled notification about filtered content + */ +export function showFilteredContentNotification(reason: string, details: string = ''): void { + const now = Date.now() + if (now - lastNotificationTime < NOTIFICATION_THROTTLE_MS) { + return // Throttle notifications + } + + lastNotificationTime = now + const message = details + ? `🔒 Sensitive content filtered: ${reason}. Details: ${details.substring(0, 50)}${details.length > 50 ? '...' : ''}` + : `🔒 Sensitive content filtered: ${reason}` + + vscode.window.showWarningMessage(message, 'View .crowdcodeignore').then((selection: string | undefined) => { + if (selection === 'View .crowdcodeignore') { + const workspaceFolder = vscode.workspace.workspaceFolders?.[0] + if (workspaceFolder) { + const ignoreFilePath = path.join(workspaceFolder.uri.fsPath, '.crowdcodeignore') + vscode.window.showTextDocument(vscode.Uri.file(ignoreFilePath)) + } + } + }) +} + +/** + * Simple glob pattern matching + */ +function isGlobMatch(text: string, pattern: string): boolean { + // Convert glob pattern to regex + const regexPattern = pattern + .replace(/\./g, '\\.') + .replace(/\*/g, '.*') + .replace(/\?/g, '.') + .replace(/\*\*/g, '.*') + + const regex = new RegExp(`^${regexPattern}$`, 'i') + return regex.test(text) || regex.test(text.replace(/\\/g, '/')) +} + +/** + * Get placeholder text for filtered content + */ +export function getFilteredPlaceholder(reason: string): string { + return `[SENSITIVE_CONTENT_FILTERED: ${reason}]` +} + +/** + * Reload security filter (for when .crowdcodeignore is modified) + */ +export async function reloadSecurityFilter(): Promise { + await loadSecurityFilter() + logToOutput('Security filter reloaded', 'info') +} \ No newline at end of file From d9d57419d2b9ddbcf3c438bbf5fd4c366d8e11b7 Mon Sep 17 00:00:00 2001 From: Yll Date: Sat, 27 Sep 2025 21:55:19 +0200 Subject: [PATCH 2/2] Change notification for ignored files and patterns to only once per session --- src/recording.ts | 2 +- src/security.ts | 31 +++++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/recording.ts b/src/recording.ts index ba7df85..3859053 100644 --- a/src/recording.ts +++ b/src/recording.ts @@ -112,7 +112,7 @@ export function isCurrentFileExported(): boolean { // Check if file should be ignored for security reasons if (isFileIgnored(filename)) { - showFilteredContentNotification('File ignored by .crowdcodeignore', path.basename(filename)) + showFilteredContentNotification('File ignored by .crowdcodeignore', path.basename(filename), filename) return true } diff --git a/src/security.ts b/src/security.ts index 36214bc..cf10e66 100644 --- a/src/security.ts +++ b/src/security.ts @@ -12,6 +12,10 @@ let securityFilter: SecurityFilter | null = null let lastNotificationTime = 0 const NOTIFICATION_THROTTLE_MS = 5000 // Only show notifications every 5 seconds +// Track which files have already shown notifications this session +const notifiedFilesThisSession = new Set() +const notifiedContentPatternsThisSession = new Set() + // Default .crowdcodeignore template const DEFAULT_CROWDCODE_IGNORE = `# crowd-code security ignore patterns # This file prevents sensitive files and content from being recorded @@ -296,14 +300,37 @@ export function containsSensitiveContent(text: string): { isSensitive: boolean; } /** - * Show throttled notification about filtered content + * Show throttled notification about filtered content - only once per session per file/content type */ -export function showFilteredContentNotification(reason: string, details: string = ''): void { +export function showFilteredContentNotification(reason: string, details: string = '', filePath?: string): void { const now = Date.now() if (now - lastNotificationTime < NOTIFICATION_THROTTLE_MS) { return // Throttle notifications } + // Create a unique key for this notification type + let notificationKey: string + if (filePath && reason.includes('File ignored')) { + // For file-based notifications, use the file path + notificationKey = `file:${filePath}` + } else { + // For content-based notifications, use the reason + first part of details + notificationKey = `content:${reason}:${details.split(',')[0]}` + } + + // Check if we've already shown this notification this session + if (filePath && reason.includes('File ignored')) { + if (notifiedFilesThisSession.has(notificationKey)) { + return // Already notified about this file this session + } + notifiedFilesThisSession.add(notificationKey) + } else { + if (notifiedContentPatternsThisSession.has(notificationKey)) { + return // Already notified about this content pattern this session + } + notifiedContentPatternsThisSession.add(notificationKey) + } + lastNotificationTime = now const message = details ? `🔒 Sensitive content filtered: ${reason}. Details: ${details.substring(0, 50)}${details.length > 50 ? '...' : ''}`