diff --git a/src/app/api/cron/cleanup-stale-analyses/route.ts b/src/app/api/cron/cleanup-stale-analyses/route.ts index 831e1e751..8e2b8a79b 100644 --- a/src/app/api/cron/cleanup-stale-analyses/route.ts +++ b/src/app/api/cron/cleanup-stale-analyses/route.ts @@ -1,12 +1,20 @@ import { NextResponse } from 'next/server'; +import { captureException } from '@sentry/nextjs'; import { cleanupStaleAnalyses } from '@/lib/security-agent/db/security-analysis'; import { sentryLogger } from '@/lib/utils.server'; -import { CRON_SECRET } from '@/lib/config.server'; +import { CRON_SECRET, SECURITY_CLEANUP_BETTERSTACK_HEARTBEAT_URL } from '@/lib/config.server'; if (!CRON_SECRET) { throw new Error('CRON_SECRET is not configured in environment variables'); } +const log = sentryLogger('security-agent:cron-cleanup', 'info'); +const warn = sentryLogger('security-agent:cron-cleanup', 'warning'); +const cronWarn = sentryLogger('cron', 'warning'); + +/** Threshold for alerting on abnormally high stale analysis counts */ +const STALE_ANOMALY_THRESHOLD = 10; + /** * Cron job endpoint to cleanup stale security analyses * @@ -26,26 +34,56 @@ export async function GET(request: Request) { // Vercel sends: Authorization: Bearer const expectedAuth = `Bearer ${CRON_SECRET}`; if (authHeader !== expectedAuth) { - sentryLogger( - 'cron', - 'warning' - )( + cronWarn( 'SECURITY: Invalid CRON job authorization attempt: ' + (authHeader ? 'Invalid authorization header' : 'Missing authorization header') ); return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); } - // Execute cleanup - mark analyses running for more than 30 minutes as failed - const cleanedCount = await cleanupStaleAnalyses(30); + try { + // Execute cleanup - mark analyses running for more than 30 minutes as failed + const cleanedCount = await cleanupStaleAnalyses(30); - if (cleanedCount > 0) { - sentryLogger('cron', 'info')(`Cleaned up ${cleanedCount} stale security analyses`); - } + if (cleanedCount > 0) { + log(`Cleaned up ${cleanedCount} stale security analyses`); + } + + // Alert if abnormally high number of stale analyses indicates a systemic issue + if (cleanedCount > STALE_ANOMALY_THRESHOLD) { + warn( + `Abnormally high stale analysis count: ${cleanedCount} (threshold: ${STALE_ANOMALY_THRESHOLD}). This may indicate a systemic problem with analysis completion.`, + { cleanedCount, threshold: STALE_ANOMALY_THRESHOLD } + ); + } + + // Send heartbeat to BetterStack on success + if (SECURITY_CLEANUP_BETTERSTACK_HEARTBEAT_URL) { + await fetch(SECURITY_CLEANUP_BETTERSTACK_HEARTBEAT_URL, { signal: AbortSignal.timeout(5000) }).catch(() => {}); + } - return NextResponse.json({ - success: true, - cleanedCount, - timestamp: new Date().toISOString(), - }); + return NextResponse.json({ + success: true, + cleanedCount, + timestamp: new Date().toISOString(), + }); + } catch (error) { + captureException(error, { + tags: { endpoint: 'cron/cleanup-stale-analyses' }, + }); + + // Send failure heartbeat to BetterStack + if (SECURITY_CLEANUP_BETTERSTACK_HEARTBEAT_URL) { + await fetch(`${SECURITY_CLEANUP_BETTERSTACK_HEARTBEAT_URL}/fail`, { signal: AbortSignal.timeout(5000) }).catch(() => {}); + } + + return NextResponse.json( + { + success: false, + error: 'Failed to cleanup stale analyses', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } } diff --git a/src/app/api/cron/sync-security-alerts/route.ts b/src/app/api/cron/sync-security-alerts/route.ts index 5bf1062ff..ebf04e55a 100644 --- a/src/app/api/cron/sync-security-alerts/route.ts +++ b/src/app/api/cron/sync-security-alerts/route.ts @@ -1,11 +1,13 @@ import { NextResponse } from 'next/server'; import type { NextRequest } from 'next/server'; import { captureException } from '@sentry/nextjs'; -import { CRON_SECRET } from '@/lib/config.server'; +import { CRON_SECRET, SECURITY_SYNC_BETTERSTACK_HEARTBEAT_URL } from '@/lib/config.server'; import { runFullSync } from '@/lib/security-agent/services/sync-service'; +import { sentryLogger } from '@/lib/utils.server'; -// TODO: Create BetterStack heartbeat for security alerts sync -// const BETTERSTACK_HEARTBEAT_URL = 'https://uptime.betterstack.com/api/v1/heartbeat/...'; +const log = sentryLogger('security-agent:cron-sync', 'info'); +const cronWarn = sentryLogger('cron', 'warning'); +const logError = sentryLogger('security-agent:cron-sync', 'error'); /** * Vercel Cron Job: Sync Security Alerts @@ -19,10 +21,14 @@ export async function GET(request: NextRequest) { try { const authHeader = request.headers.get('authorization'); if (!CRON_SECRET || authHeader !== `Bearer ${CRON_SECRET}`) { + cronWarn( + 'SECURITY: Invalid CRON job authorization attempt: ' + + (authHeader ? 'Invalid authorization header' : 'Missing authorization header') + ); return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); } - console.log('[sync-security-alerts] Starting security alerts sync...'); + log('Starting security alerts sync...'); const startTime = Date.now(); const result = await runFullSync(); @@ -37,14 +43,16 @@ export async function GET(request: NextRequest) { timestamp: new Date().toISOString(), }; - console.log('[sync-security-alerts] Sync completed:', summary); + log('Sync completed', summary); - // TODO: Send heartbeat to BetterStack on success - // await fetch(BETTERSTACK_HEARTBEAT_URL); + // Send heartbeat to BetterStack on success + if (SECURITY_SYNC_BETTERSTACK_HEARTBEAT_URL) { + await fetch(SECURITY_SYNC_BETTERSTACK_HEARTBEAT_URL, { signal: AbortSignal.timeout(5000) }).catch(() => {}); + } return NextResponse.json(summary); } catch (error) { - console.error('[sync-security-alerts] Error syncing security alerts:', error); + logError('Error syncing security alerts', { error }); captureException(error, { tags: { endpoint: 'cron/sync-security-alerts' }, extra: { @@ -52,8 +60,10 @@ export async function GET(request: NextRequest) { }, }); - // TODO: Send failure heartbeat to BetterStack - // await fetch(`${BETTERSTACK_HEARTBEAT_URL}/fail`); + // Send failure heartbeat to BetterStack + if (SECURITY_SYNC_BETTERSTACK_HEARTBEAT_URL) { + await fetch(`${SECURITY_SYNC_BETTERSTACK_HEARTBEAT_URL}/fail`, { signal: AbortSignal.timeout(5000) }).catch(() => {}); + } return NextResponse.json( { diff --git a/src/lib/config.server.ts b/src/lib/config.server.ts index 548148650..da442818d 100644 --- a/src/lib/config.server.ts +++ b/src/lib/config.server.ts @@ -151,3 +151,11 @@ export const CREDIT_CATEGORIES_ENCRYPTION_KEY = getEnvVariable('CREDIT_CATEGORIE // Agent observability ingest service export const O11Y_SERVICE_URL = getEnvVariable('O11Y_SERVICE_URL') || ''; export const O11Y_KILO_GATEWAY_CLIENT_SECRET = getEnvVariable('O11Y_KILO_GATEWAY_CLIENT_SECRET'); + +// Security agent BetterStack heartbeat URLs +export const SECURITY_SYNC_BETTERSTACK_HEARTBEAT_URL = getEnvVariable( + 'SECURITY_SYNC_BETTERSTACK_HEARTBEAT_URL' +); +export const SECURITY_CLEANUP_BETTERSTACK_HEARTBEAT_URL = getEnvVariable( + 'SECURITY_CLEANUP_BETTERSTACK_HEARTBEAT_URL' +); diff --git a/src/lib/security-agent/core/types.ts b/src/lib/security-agent/core/types.ts index 699da5455..bd0a6c627 100644 --- a/src/lib/security-agent/core/types.ts +++ b/src/lib/security-agent/core/types.ts @@ -284,6 +284,8 @@ export type SecurityFindingAnalysis = { modelUsed?: string; /** User ID who triggered the analysis (for audit tracking) */ triggeredByUserId?: string; + /** Correlation ID for tracing across triage → sandbox → extraction → auto-dismiss */ + correlationId?: string; }; /** diff --git a/src/lib/security-agent/github/dependabot-api.ts b/src/lib/security-agent/github/dependabot-api.ts index 0bf776b4b..c00fa89b2 100644 --- a/src/lib/security-agent/github/dependabot-api.ts +++ b/src/lib/security-agent/github/dependabot-api.ts @@ -7,6 +7,11 @@ import { Octokit } from '@octokit/rest'; import { generateGitHubInstallationToken } from '@/lib/integrations/platforms/github/adapter'; import type { DependabotAlertRaw, DependabotAlertState } from '../core/types'; +import { sentryLogger } from '@/lib/utils.server'; + +const log = sentryLogger('security-agent:dependabot-api', 'info'); +const warn = sentryLogger('security-agent:dependabot-api', 'warning'); +const logError = sentryLogger('security-agent:dependabot-api', 'error'); /** * Dependabot alert from GitHub API @@ -111,40 +116,61 @@ export async function fetchAllDependabotAlerts( owner: string, repo: string ): Promise { - console.log( - `[dependabot-api] Fetching alerts for ${owner}/${repo} (installationId=${installationId})` - ); + log(`Fetching alerts for ${owner}/${repo}`, { installationId }); + const apiStartTime = performance.now(); const tokenData = await generateGitHubInstallationToken(installationId); const octokit = new Octokit({ auth: tokenData.token }); try { - console.log(`[dependabot-api] Fetching all alerts for ${owner}/${repo} using pagination...`); // Use Octokit's paginate helper which handles cursor-based pagination automatically // The Dependabot API does not support the `page` parameter - const data = await octokit.paginate(octokit.rest.dependabot.listAlertsForRepo, { - owner, - repo, - per_page: 100, - // No state filter - get all alerts including fixed/dismissed - }); + const data = await octokit.paginate( + octokit.rest.dependabot.listAlertsForRepo, + { + owner, + repo, + per_page: 100, + // No state filter - get all alerts including fixed/dismissed + }, + response => { + // Track rate limit on each page response + const remaining = response.headers['x-ratelimit-remaining']; + const limit = response.headers['x-ratelimit-limit']; + if (remaining !== undefined && Number(remaining) < 100) { + warn(`GitHub API rate limit low: ${remaining}/${limit} remaining`, { + repo: `${owner}/${repo}`, + }); + } + return response.data; + } + ); + const apiDurationMs = Math.round(performance.now() - apiStartTime); const alerts = data.map(alert => toInternalAlert(alert as unknown as GitHubDependabotAlert)); - console.log(`[dependabot-api] Total alerts fetched for ${owner}/${repo}: ${alerts.length}`); + + log(`Alerts fetched for ${owner}/${repo}`, { + alertCount: alerts.length, + durationMs: apiDurationMs, + }); + return alerts; } catch (error) { + const apiDurationMs = Math.round(performance.now() - apiStartTime); const status = (error as { status?: number }).status; const message = (error as { message?: string }).message; // Handle case where Dependabot alerts are disabled for the repository if (status === 403 && message?.includes('Dependabot alerts are disabled')) { - console.log(`[dependabot-api] Dependabot alerts are disabled for ${owner}/${repo}, skipping`); + log(`Dependabot alerts are disabled for ${owner}/${repo}, skipping`); return []; } - console.error( - `[dependabot-api] Error fetching alerts for ${owner}/${repo}: status=${status}, message=${message}` - ); + logError(`Error fetching alerts for ${owner}/${repo}`, { + status, + message, + durationMs: apiDurationMs, + }); throw error; } } diff --git a/src/lib/security-agent/services/analysis-service.ts b/src/lib/security-agent/services/analysis-service.ts index c84206ebc..6777fb3dc 100644 --- a/src/lib/security-agent/services/analysis-service.ts +++ b/src/lib/security-agent/services/analysis-service.ts @@ -10,6 +10,7 @@ */ import 'server-only'; +import { randomUUID } from 'crypto'; import { createCloudAgentClient } from '@/lib/cloud-agent/cloud-agent-client'; import { generateApiToken } from '@/lib/tokens'; import { getSecurityFindingById } from '../db/security-findings'; @@ -17,7 +18,7 @@ import { updateAnalysisStatus } from '../db/security-analysis'; import type { SecurityFindingAnalysis, SecurityReviewOwner } from '../core/types'; import type { User, SecurityFinding } from '@/db/schema'; import type { StreamEvent, SystemKilocodeEvent } from '@/components/cloud-agent/types'; -import { captureException } from '@sentry/nextjs'; +import { addBreadcrumb, captureException, startSpan, withScope } from '@sentry/nextjs'; import { db } from '@/lib/drizzle'; import { cliSessions } from '@/db/schema'; import { eq } from 'drizzle-orm'; @@ -25,6 +26,11 @@ import { getBlobContent } from '@/lib/r2/cli-sessions'; import { triageSecurityFinding } from './triage-service'; import { extractSandboxAnalysis } from './extraction-service'; import { maybeAutoDismissAnalysis } from './auto-dismiss-service'; +import { sentryLogger } from '@/lib/utils.server'; + +const log = sentryLogger('security-agent:analysis', 'info'); +const warn = sentryLogger('security-agent:analysis', 'warning'); +const logError = sentryLogger('security-agent:analysis', 'error'); const ANALYSIS_PROMPT_TEMPLATE = `You are a security analyst reviewing a dependency vulnerability alert for a codebase. @@ -130,7 +136,10 @@ function getCliMessageContent(msg: RawCliMessage): string | null { * Fetch the last assistant message from a CLI session's ui_messages blob. * This is the final analysis result that we want to store. */ -async function fetchLastAssistantMessage(cliSessionId: string): Promise { +async function fetchLastAssistantMessage( + cliSessionId: string, + correlationId: string +): Promise { try { // Get the session to find the ui_messages blob URL const [session] = await db @@ -140,19 +149,22 @@ async function fetchLastAssistantMessage(cliSessionId: string): Promise ({ type: m.type, say: m.say, ask: m.ask })), @@ -164,11 +176,11 @@ async function fetchLastAssistantMessage(cliSessionId: string): Promise { if (!rawMarkdown.trim()) { @@ -262,17 +275,20 @@ async function finalizeAnalysis( // ========================================================================= // Tier 3: Extract structured fields from raw markdown // ========================================================================= - console.log('[Security Analysis] Starting Tier 3 extraction:', { findingId }); + log('Starting Tier 3 extraction', { correlationId, findingId }); - const sandboxAnalysis = await extractSandboxAnalysis( + const sandboxAnalysis = await extractSandboxAnalysis({ finding, rawMarkdown, authToken, model, - organizationId - ); + correlationId, + userId, + organizationId, + }); - console.log('[Security Analysis] Extraction complete:', { + log('Extraction complete', { + correlationId, findingId, isExploitable: sandboxAnalysis.isExploitable, usageLocationsCount: sandboxAnalysis.usageLocations.length, @@ -284,19 +300,22 @@ async function finalizeAnalysis( analyzedAt: new Date().toISOString(), modelUsed: model, triggeredByUserId: existingAnalysis?.triggeredByUserId, + correlationId, }; await updateAnalysisStatus(findingId, 'completed', { analysis }); // Attempt auto-dismiss after sandbox analysis if isExploitable === false if (sandboxAnalysis.isExploitable === false) { - void maybeAutoDismissAnalysis(findingId, analysis, owner, userId).catch((error: unknown) => { - console.error('[Security Analysis] Auto-dismiss after sandbox error:', error); - captureException(error, { - tags: { operation: 'maybeAutoDismissAnalysis' }, - extra: { findingId }, - }); - }); + void maybeAutoDismissAnalysis({ findingId, analysis, owner, userId, correlationId }).catch( + (error: unknown) => { + logError('Auto-dismiss after sandbox error', { correlationId, findingId, error }); + captureException(error, { + tags: { operation: 'maybeAutoDismissAnalysis' }, + extra: { findingId, correlationId }, + }); + } + ); } } @@ -318,141 +337,207 @@ async function processAnalysisStream( owner: SecurityReviewOwner, userId: string, authToken: string, + correlationId: string, organizationId?: string ): Promise { let cloudAgentSessionId: string | null = null; let cliSessionId: string | null = null; - - try { - for await (const event of streamGenerator) { - switch (event.streamEventType) { - case 'status': - if (event.sessionId) { - // Only persist sessionId / running transition when it first becomes available. - // Cloud-agent emits multiple status events; avoid unnecessary writes. - if (cloudAgentSessionId !== event.sessionId) { - cloudAgentSessionId = event.sessionId; - await updateAnalysisStatus(findingId, 'running', { sessionId: cloudAgentSessionId }); + const streamStartTime = performance.now(); + + // Wrap in withScope so Sentry tags apply to this background work. + // This is a fire-and-forget function — the parent request scope is already gone. + await withScope(async scope => { + scope.setTag('security_agent.correlation_id', correlationId); + scope.setTag('security_agent.finding_id', findingId); + + await startSpan( + { name: 'security-agent.sandbox-analysis', op: 'ai.pipeline' }, + async span => { + span.setAttribute('security_agent.finding_id', findingId); + span.setAttribute('security_agent.model', model); + span.setAttribute('security_agent.correlation_id', correlationId); + + try { + for await (const event of streamGenerator) { + switch (event.streamEventType) { + case 'status': + if (event.sessionId) { + if (cloudAgentSessionId !== event.sessionId) { + cloudAgentSessionId = event.sessionId; + await updateAnalysisStatus(findingId, 'running', { + sessionId: cloudAgentSessionId, + }); + } + } + break; + + case 'kilocode': { + if (isSessionCreatedEvent(event)) { + const payloadSessionId = event.payload.sessionId; + if (typeof payloadSessionId === 'string') { + cliSessionId = payloadSessionId; + log('Session created', { + correlationId, + findingId, + cloudAgentSessionId, + cliSessionId, + }); + await updateAnalysisStatus(findingId, 'running', { + sessionId: cloudAgentSessionId ?? undefined, + cliSessionId, + }); + } + } + break; + } + + case 'error': + span.setAttribute('security_agent.status', 'failed'); + span.setAttribute( + 'security_agent.duration_ms', + Math.round(performance.now() - streamStartTime) + ); + await updateAnalysisStatus(findingId, 'failed', { + error: event.error || 'Unknown error during analysis', + }); + return; + + case 'complete': { + log('Stream complete, fetching last message', { correlationId, findingId }); + + if (!cliSessionId) { + span.setAttribute('security_agent.status', 'failed'); + await updateAnalysisStatus(findingId, 'failed', { + error: 'Analysis completed but no CLI session ID was captured', + }); + return; + } + + // Wait/retry for session ui_messages to be available in DB/R2. + const maxAttempts = 5; + let delayMs = 1500; + let lastMessage: string | null = null; + const retryStartTime = performance.now(); + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + await new Promise(resolve => setTimeout(resolve, delayMs)); + lastMessage = await fetchLastAssistantMessage(cliSessionId, correlationId); + if (lastMessage) { + const retryDurationMs = Math.round(performance.now() - retryStartTime); + log('R2 fetch succeeded', { + correlationId, + findingId, + attempt, + totalRetryDurationMs: retryDurationMs, + }); + span.setAttribute('security_agent.r2_retry_attempt', attempt); + span.setAttribute('security_agent.r2_retry_duration_ms', retryDurationMs); + break; + } + delayMs = Math.min(delayMs * 2, 15_000); + } + + if (!lastMessage) { + const retryDurationMs = Math.round(performance.now() - retryStartTime); + warn('R2 fetch failed after all attempts', { + correlationId, + findingId, + attempts: maxAttempts, + totalRetryDurationMs: retryDurationMs, + }); + span.setAttribute('security_agent.r2_retry_attempt', maxAttempts); + span.setAttribute('security_agent.r2_retry_exhausted', true); + } + + const streamDurationMs = Math.round(performance.now() - streamStartTime); + span.setAttribute('security_agent.duration_ms', streamDurationMs); + + if (lastMessage) { + span.setAttribute('security_agent.status', 'completed'); + await finalizeAnalysis( + findingId, + lastMessage, + model, + owner, + userId, + authToken, + correlationId, + organizationId + ); + } else { + span.setAttribute('security_agent.status', 'failed'); + await updateAnalysisStatus(findingId, 'failed', { + error: + 'Analysis completed but result was not available (no completion_result found)', + }); + } + return; + } + + case 'interrupted': + span.setAttribute('security_agent.status', 'interrupted'); + span.setAttribute( + 'security_agent.duration_ms', + Math.round(performance.now() - streamStartTime) + ); + await updateAnalysisStatus(findingId, 'failed', { + error: `Analysis interrupted: ${event.reason}`, + }); + return; } } - break; - - case 'kilocode': { - // Check for session_created event to capture CLI session ID - if (isSessionCreatedEvent(event)) { - const payloadSessionId = event.payload.sessionId; - if (typeof payloadSessionId === 'string') { - cliSessionId = payloadSessionId; - console.log('[Security Analysis] Session created:', { + + // Stream ended without explicit completion event + const streamDurationMs = Math.round(performance.now() - streamStartTime); + span.setAttribute('security_agent.duration_ms', streamDurationMs); + warn('Stream ended without complete event', { correlationId, findingId }); + + if (cliSessionId) { + await new Promise(resolve => setTimeout(resolve, 2000)); + const lastMessage = await fetchLastAssistantMessage(cliSessionId, correlationId); + if (lastMessage) { + span.setAttribute('security_agent.status', 'completed'); + await finalizeAnalysis( findingId, - cloudAgentSessionId, - cliSessionId, - }); - // Update the finding with the CLI session ID for linking - await updateAnalysisStatus(findingId, 'running', { - sessionId: cloudAgentSessionId ?? undefined, - cliSessionId, - }); + lastMessage, + model, + owner, + userId, + authToken, + correlationId, + organizationId + ); + return; } } - break; - } - case 'error': + span.setAttribute('security_agent.status', 'failed'); await updateAnalysisStatus(findingId, 'failed', { - error: event.error || 'Unknown error during analysis', + error: 'Analysis stream ended without completion', }); - return; - - case 'complete': { - // Stream completed - fetch the last message from the session - console.log('[Security Analysis] Stream complete, fetching last message:', { + } catch (error) { + // Catch inside startSpan so span attributes are still available (#7) + const streamDurationMs = Math.round(performance.now() - streamStartTime); + span.setAttribute('security_agent.status', 'error'); + span.setAttribute('security_agent.duration_ms', streamDurationMs); + + logError('processAnalysisStream failed', { + correlationId, findingId, - cliSessionId, + durationMs: streamDurationMs, + error, }); - - if (!cliSessionId) { - await updateAnalysisStatus(findingId, 'failed', { - error: 'Analysis completed but no CLI session ID was captured', - }); - return; - } - - // Wait/retry for session ui_messages to be available in DB/R2. - // R2 writes can lag behind the stream completion event. - const maxAttempts = 5; - let delayMs = 1500; - let lastMessage: string | null = null; - - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - await new Promise(resolve => setTimeout(resolve, delayMs)); - lastMessage = await fetchLastAssistantMessage(cliSessionId); - if (lastMessage) break; - delayMs = Math.min(delayMs * 2, 15_000); - } - - if (lastMessage) { - await finalizeAnalysis( - findingId, - lastMessage, - model, - owner, - userId, - authToken, - organizationId - ); - } else { - await updateAnalysisStatus(findingId, 'failed', { - error: 'Analysis completed but result was not available (no completion_result found)', - }); - } - return; - } - - case 'interrupted': await updateAnalysisStatus(findingId, 'failed', { - error: `Analysis interrupted: ${event.reason}`, + error: error instanceof Error ? error.message : String(error), }); - return; - } - } - - // Stream ended without explicit completion event - console.log('[Security Analysis] Stream ended without complete event:', { - findingId, - cliSessionId, - }); - - if (cliSessionId) { - // Try to fetch the last message anyway - await new Promise(resolve => setTimeout(resolve, 2000)); - const lastMessage = await fetchLastAssistantMessage(cliSessionId); - if (lastMessage) { - await finalizeAnalysis( - findingId, - lastMessage, - model, - owner, - userId, - authToken, - organizationId - ); - return; + captureException(error, { + tags: { operation: 'processAnalysisStream' }, + extra: { findingId, cloudAgentSessionId, cliSessionId, correlationId }, + }); + } } - } - - await updateAnalysisStatus(findingId, 'failed', { - error: 'Analysis stream ended without completion', - }); - } catch (error) { - await updateAnalysisStatus(findingId, 'failed', { - error: error instanceof Error ? error.message : String(error), - }); - captureException(error, { - tags: { operation: 'processAnalysisStream' }, - extra: { findingId, cloudAgentSessionId, cliSessionId }, - }); - } + ); + }); } /** @@ -461,14 +546,6 @@ async function processAnalysisStream( * Tier 1 (Quick Triage): Always runs first. Direct LLM call to analyze metadata. * Tier 2 (Sandbox Analysis): Only runs if triage says it's needed OR forceSandbox is true. * Tier 3 (Structured Extraction): Extracts structured fields from raw markdown output. - * - * @param params.findingId - The security finding to analyze - * @param params.user - The user initiating the analysis - * @param params.githubRepo - The GitHub repository (owner/repo format) - * @param params.githubToken - Optional GitHub token for private repos - * @param params.model - Model to use for analysis - * @param params.forceSandbox - Skip triage decision and always run sandbox analysis - * @param params.organizationId - Optional organization ID (for org-level analysis) */ export async function startSecurityAnalysis(params: { findingId: string; @@ -489,6 +566,8 @@ export async function startSecurityAnalysis(params: { organizationId, } = params; + const correlationId = randomUUID(); + // Get the finding const finding = await getSecurityFindingById(findingId); if (!finding) { @@ -510,17 +589,40 @@ export async function startSecurityAnalysis(params: { // ========================================================================= // Tier 1: Quick Triage (always runs) // ========================================================================= - console.log('[Security Analysis] Starting Tier 1 triage:', { findingId, model }); + log('Starting Tier 1 triage', { correlationId, findingId, model }); - const triage = await triageSecurityFinding(finding, authToken, model, organizationId); + const tier1Start = performance.now(); + const triage = await triageSecurityFinding({ + finding, + authToken, + model, + correlationId, + userId: user.id, + organizationId, + }); + const tier1DurationMs = Math.round(performance.now() - tier1Start); - console.log('[Security Analysis] Triage complete:', { + log('Triage complete', { + correlationId, findingId, - needsSandboxAnalysis: triage.needsSandboxAnalysis, - needsSandboxReasoning: triage.needsSandboxReasoning, + durationMs: tier1DurationMs, suggestedAction: triage.suggestedAction, confidence: triage.confidence, - forceSandbox, + needsSandboxAnalysis: triage.needsSandboxAnalysis, + }); + + addBreadcrumb({ + category: 'security-agent.triage', + message: `Triage outcome: ${triage.suggestedAction}`, + level: 'info', + data: { + correlationId, + findingId, + suggestedAction: triage.suggestedAction, + confidence: triage.confidence, + needsSandbox: triage.needsSandboxAnalysis, + durationMs: tier1DurationMs, + }, }); // Decide whether to run sandbox analysis @@ -530,27 +632,33 @@ export async function startSecurityAnalysis(params: { // ========================================================================= // Triage-only: Save result and potentially auto-dismiss // ========================================================================= - console.log('[Security Analysis] Triage-only completion:', { findingId }); + log('Triage-only completion', { correlationId, findingId }); const analysis: SecurityFindingAnalysis = { triage, - // No sandboxAnalysis - triage determined sandbox not needed analyzedAt: new Date().toISOString(), modelUsed: model, triggeredByUserId: user.id, + correlationId, }; await updateAnalysisStatus(findingId, 'completed', { analysis }); - // Attempt auto-dismiss if configured (off by default) - const owner: SecurityReviewOwner = organizationId ? { organizationId } : { userId: user.id }; - - // Run auto-dismiss in background (don't block response) - void maybeAutoDismissAnalysis(findingId, analysis, owner, user.id).catch((error: unknown) => { - console.error('[Security Analysis] Auto-dismiss error:', error); + const owner: SecurityReviewOwner = organizationId + ? { organizationId } + : { userId: user.id }; + + void maybeAutoDismissAnalysis({ + findingId, + analysis, + owner, + userId: user.id, + correlationId, + }).catch((error: unknown) => { + logError('Auto-dismiss error', { correlationId, findingId, error }); captureException(error, { tags: { operation: 'maybeAutoDismissAnalysis' }, - extra: { findingId }, + extra: { findingId, correlationId }, }); }); @@ -560,39 +668,33 @@ export async function startSecurityAnalysis(params: { // ========================================================================= // Tier 2: Sandbox Analysis (cloud agent) // ========================================================================= - console.log('[Security Analysis] Starting Tier 2 sandbox analysis:', { findingId }); + log('Starting Tier 2 sandbox analysis', { correlationId, findingId }); - // Save triage first, then start sandbox const partialAnalysis: SecurityFindingAnalysis = { triage, analyzedAt: new Date().toISOString(), modelUsed: model, triggeredByUserId: user.id, + correlationId, }; await updateAnalysisStatus(findingId, 'pending', { analysis: partialAnalysis }); - // Build the analysis prompt const prompt = buildAnalysisPrompt(finding); - - // Create cloud agent client const client = createCloudAgentClient(authToken); - console.log('[Security Analysis] Starting cloud agent:', { findingId }); - - // Start the session stream (no MCP - Tier 3 extraction handles structured output) const streamGenerator = client.initiateSessionStream({ githubRepo, githubToken, prompt, - mode: 'code', // Use code mode so agent can search files + mode: 'code', model, }); - // Build owner for auto-dismiss context - const owner: SecurityReviewOwner = organizationId ? { organizationId } : { userId: user.id }; + const owner: SecurityReviewOwner = organizationId + ? { organizationId } + : { userId: user.id }; - // Process stream in background (don't await) - // Using void to explicitly ignore the promise + // Fire-and-forget: processAnalysisStream manages its own Sentry scope (#2) void processAnalysisStream( findingId, streamGenerator, @@ -600,6 +702,7 @@ export async function startSecurityAnalysis(params: { owner, user.id, authToken, + correlationId, organizationId ); @@ -610,7 +713,7 @@ export async function startSecurityAnalysis(params: { }); captureException(error, { tags: { operation: 'startSecurityAnalysis' }, - extra: { findingId, githubRepo }, + extra: { findingId, githubRepo, correlationId }, }); return { started: false, error: error instanceof Error ? error.message : String(error) }; } diff --git a/src/lib/security-agent/services/auto-dismiss-service.ts b/src/lib/security-agent/services/auto-dismiss-service.ts index 88d1dc28b..a1ca1aa43 100644 --- a/src/lib/security-agent/services/auto-dismiss-service.ts +++ b/src/lib/security-agent/services/auto-dismiss-service.ts @@ -18,6 +18,10 @@ import { updateSecurityFindingStatus } from '../db/security-findings'; import { getSecurityAgentConfig } from '../db/security-config'; import type { Owner } from '@/lib/code-reviews/core'; import type { SecurityFindingAnalysis, SecurityReviewOwner } from '../core/types'; +import { sentryLogger } from '@/lib/utils.server'; + +const log = sentryLogger('security-agent:auto-dismiss', 'info'); +const logError = sentryLogger('security-agent:auto-dismiss', 'error'); /** * Convert SecurityReviewOwner + userId to Owner format for config lookups. @@ -60,21 +64,24 @@ type AutoDismissSource = 'triage' | 'sandbox'; * Only runs if auto-dismiss is enabled in config. * * Priority: - * 1. If sandboxAnalysis exists and isExploitable === false → dismiss (no confidence threshold) - * 2. If triage.suggestedAction === 'dismiss' → dismiss (with confidence threshold) + * 1. If sandboxAnalysis exists and isExploitable === false -> dismiss (no confidence threshold) + * 2. If triage.suggestedAction === 'dismiss' -> dismiss (with confidence threshold) * - * @param findingId - The ID of the finding to potentially dismiss - * @param analysis - The full analysis result (triage + optional sandbox) - * @param owner - The security review owner (org or user) - * @param userId - The user performing the action (for audit/permissions) + * @param options.findingId - The ID of the finding to potentially dismiss + * @param options.analysis - The full analysis result (triage + optional sandbox) + * @param options.owner - The security review owner (org or user) + * @param options.userId - The user performing the action (for audit/permissions) + * @param options.correlationId - Correlation ID for tracing across the analysis pipeline * @returns Object with dismissed status and source */ -export async function maybeAutoDismissAnalysis( - findingId: string, - analysis: SecurityFindingAnalysis, - owner: SecurityReviewOwner, - userId: string -): Promise<{ dismissed: boolean; source?: AutoDismissSource }> { +export async function maybeAutoDismissAnalysis(options: { + findingId: string; + analysis: SecurityFindingAnalysis; + owner: SecurityReviewOwner; + userId: string; + correlationId?: string; +}): Promise<{ dismissed: boolean; source?: AutoDismissSource }> { + const { findingId, analysis, owner, userId, correlationId = '' } = options; const ownerConverted = toOwner(owner, userId); const config = await getSecurityAgentConfig(ownerConverted); @@ -91,9 +98,9 @@ export async function maybeAutoDismissAnalysis( dismissedBy: 'auto-sandbox', }); - console.log('[AutoDismiss] Auto-dismissed finding (sandbox):', { + log('Auto-dismissed finding (sandbox)', { + correlationId, findingId, - isExploitable: analysis.sandboxAnalysis.isExploitable, reasoning: analysis.sandboxAnalysis.exploitabilityReasoning.slice(0, 100), }); @@ -118,7 +125,8 @@ export async function maybeAutoDismissAnalysis( dismissedBy: 'auto-triage', }); - console.log('[AutoDismiss] Auto-dismissed finding (triage):', { + log('Auto-dismissed finding (triage)', { + correlationId, findingId, confidence: triage.confidence, reasoning: triage.needsSandboxReasoning.slice(0, 100), @@ -215,7 +223,7 @@ export async function autoDismissEligibleFindings( }); dismissed++; } catch (error) { - console.error('[AutoDismiss] Error dismissing finding:', finding.id, error); + logError('Error dismissing finding', { findingId: finding.id, error }); captureException(error, { tags: { operation: 'autoDismissEligibleFindings' }, extra: { findingId: finding.id }, @@ -224,7 +232,7 @@ export async function autoDismissEligibleFindings( } } - console.log('[AutoDismiss] Bulk auto-dismiss complete:', { dismissed, skipped, errors }); + log('Bulk auto-dismiss complete', { dismissed, skipped, errors }); return { dismissed, skipped, errors }; } diff --git a/src/lib/security-agent/services/extraction-service.ts b/src/lib/security-agent/services/extraction-service.ts index 28315fe16..7d4cade42 100644 --- a/src/lib/security-agent/services/extraction-service.ts +++ b/src/lib/security-agent/services/extraction-service.ts @@ -13,7 +13,10 @@ import type OpenAI from 'openai'; import { sendProxiedChatCompletion } from '@/lib/llm-proxy-helpers'; import type { SecurityFinding } from '@/db/schema'; import type { SecurityFindingSandboxAnalysis, SandboxSuggestedAction } from '../core/types'; -import { captureException } from '@sentry/nextjs'; +import { addBreadcrumb, captureException, startSpan } from '@sentry/nextjs'; +import { sentryLogger } from '@/lib/utils.server'; +import { emitApiMetrics } from '@/lib/o11y/api-metrics.server'; +import { O11Y_KILO_GATEWAY_CLIENT_SECRET } from '@/lib/config.server'; const VALID_SUGGESTED_ACTIONS: SandboxSuggestedAction[] = [ 'dismiss', @@ -22,6 +25,9 @@ const VALID_SUGGESTED_ACTIONS: SandboxSuggestedAction[] = [ 'monitor', ]; +const log = sentryLogger('security-agent:extraction', 'info'); +const logError = sentryLogger('security-agent:extraction', 'error'); + // Version string for API requests - must be >= 4.69.1 to pass LLM proxy version check const EXTRACTION_SERVICE_VERSION = '5.0.0'; const EXTRACTION_SERVICE_USER_AGENT = `Kilo-Security-Extraction/${EXTRACTION_SERVICE_VERSION}`; @@ -157,42 +163,33 @@ function parseExtractionResult( try { const parsed = JSON.parse(args); - // Validate isExploitable if (typeof parsed.isExploitable !== 'boolean' && parsed.isExploitable !== 'unknown') { - console.error('[Extraction] Invalid isExploitable:', parsed.isExploitable); + logError('Invalid isExploitable', { value: parsed.isExploitable }); return null; } - // Validate exploitabilityReasoning if (typeof parsed.exploitabilityReasoning !== 'string') { - console.error( - '[Extraction] Invalid exploitabilityReasoning:', - parsed.exploitabilityReasoning - ); + logError('Invalid exploitabilityReasoning', { value: parsed.exploitabilityReasoning }); return null; } - // Validate usageLocations if (!Array.isArray(parsed.usageLocations)) { - console.error('[Extraction] Invalid usageLocations:', parsed.usageLocations); + logError('Invalid usageLocations', { value: parsed.usageLocations }); return null; } - // Validate suggestedFix if (typeof parsed.suggestedFix !== 'string') { - console.error('[Extraction] Invalid suggestedFix:', parsed.suggestedFix); + logError('Invalid suggestedFix', { value: parsed.suggestedFix }); return null; } - // Validate suggestedAction if (!VALID_SUGGESTED_ACTIONS.includes(parsed.suggestedAction)) { - console.error('[Extraction] Invalid suggestedAction:', parsed.suggestedAction); + logError('Invalid suggestedAction', { value: parsed.suggestedAction }); return null; } - // Validate summary if (typeof parsed.summary !== 'string') { - console.error('[Extraction] Invalid summary:', parsed.summary); + logError('Invalid summary', { value: parsed.summary }); return null; } @@ -207,7 +204,7 @@ function parseExtractionResult( analysisAt: new Date().toISOString(), }; } catch (error) { - console.error('[Extraction] Failed to parse tool arguments:', error); + logError('Failed to parse tool arguments', { error }); return null; } } @@ -235,20 +232,33 @@ function createFallbackExtraction( * Extract structured analysis fields from raw markdown output. * Uses direct LLM call with function calling to parse the unstructured analysis. * - * @param finding - The security finding being analyzed - * @param rawMarkdown - Raw markdown output from sandbox analysis - * @param authToken - Auth token for the LLM proxy - * @param model - Model to use for extraction (default: anthropic/claude-sonnet-4) - * @param organizationId - Optional organization ID for usage tracking + * @param options.finding - The security finding being analyzed + * @param options.rawMarkdown - Raw markdown output from sandbox analysis + * @param options.authToken - Auth token for the LLM proxy + * @param options.model - Model to use for extraction (default: anthropic/claude-sonnet-4) + * @param options.correlationId - Correlation ID for tracing across the analysis pipeline + * @param options.userId - User ID for metrics tracking + * @param options.organizationId - Optional organization ID for usage tracking */ -export async function extractSandboxAnalysis( - finding: SecurityFinding, - rawMarkdown: string, - authToken: string, - model: string = 'anthropic/claude-sonnet-4', - organizationId?: string -): Promise { - console.log('[Extraction] Starting extraction for finding:', finding.id); +export async function extractSandboxAnalysis(options: { + finding: SecurityFinding; + rawMarkdown: string; + authToken: string; + model?: string; + correlationId?: string; + userId?: string; + organizationId?: string; +}): Promise { + const { + finding, + rawMarkdown, + authToken, + model = 'anthropic/claude-sonnet-4', + correlationId = '', + userId = '', + organizationId, + } = options; + log('Starting extraction', { correlationId, findingId: finding.id }); const messages: ChatMessage[] = [ { @@ -262,70 +272,181 @@ export async function extractSandboxAnalysis( ]; try { - const result = await sendProxiedChatCompletion({ - authToken, - version: EXTRACTION_SERVICE_VERSION, - userAgent: EXTRACTION_SERVICE_USER_AGENT, - body: { - model, - messages, - tools: [SUBMIT_EXTRACTION_TOOL], - tool_choice: { - type: 'function', - function: { name: 'submit_analysis_extraction' }, - }, - }, - organizationId, - }); - - if (!result.ok) { - console.error('[Extraction] API error:', result.status, result.error); - captureException(new Error(`Extraction API error: ${result.status}`), { - tags: { operation: 'extractSandboxAnalysis' }, - extra: { findingId: finding.id, status: result.status, error: result.error }, - }); - return createFallbackExtraction(rawMarkdown, `API error: ${result.status}`); - } - - const choice = result.data.choices?.[0]; - if (!choice) { - console.error('[Extraction] No choice in response'); - return createFallbackExtraction(rawMarkdown, 'No response from LLM'); - } - - const message = choice.message; - const toolCall = message.tool_calls?.[0]; - - if (!toolCall || toolCall.type !== 'function') { - console.error('[Extraction] No tool call in response'); - return createFallbackExtraction(rawMarkdown, 'LLM did not call the extraction tool'); - } - - if (toolCall.function.name !== 'submit_analysis_extraction') { - console.error('[Extraction] Unexpected tool call:', toolCall.function.name); - return createFallbackExtraction(rawMarkdown, `Unexpected tool: ${toolCall.function.name}`); - } - - const extractionResult = parseExtractionResult(toolCall.function.arguments, rawMarkdown); - if (!extractionResult) { - return createFallbackExtraction(rawMarkdown, 'Failed to parse extraction result'); - } - - console.log('[Extraction] Extraction complete:', { - findingId: finding.id, - isExploitable: extractionResult.isExploitable, - usageLocationsCount: extractionResult.usageLocations.length, - summaryLength: extractionResult.summary.length, - }); + const extractionResult = await startSpan( + { name: 'security-agent.extraction', op: 'ai.inference' }, + async span => { + span.setAttribute('security_agent.model', model); + span.setAttribute('security_agent.finding_id', finding.id); + span.setAttribute('security_agent.correlation_id', correlationId); + + const llmStart = performance.now(); + + const result = await sendProxiedChatCompletion({ + authToken, + version: EXTRACTION_SERVICE_VERSION, + userAgent: EXTRACTION_SERVICE_USER_AGENT, + body: { + model, + messages, + tools: [SUBMIT_EXTRACTION_TOOL], + tool_choice: { + type: 'function', + function: { name: 'submit_analysis_extraction' }, + }, + }, + organizationId, + }); + + const durationMs = Math.round(performance.now() - llmStart); + span.setAttribute('security_agent.duration_ms', durationMs); + + if (!result.ok) { + logError('Extraction API error', { + correlationId, + findingId: finding.id, + status: result.status, + }); + captureException(new Error(`Extraction API error: ${result.status}`), { + tags: { operation: 'extractSandboxAnalysis' }, + extra: { + findingId: finding.id, + status: result.status, + error: result.error, + correlationId, + }, + }); + + span.setAttribute('security_agent.status', 'error'); + span.setAttribute('security_agent.is_fallback', true); + + addBreadcrumb({ + category: 'security-agent.extraction', + message: 'Extraction fallback used', + level: 'warning', + data: { correlationId, findingId: finding.id, isFallback: true }, + }); + + return createFallbackExtraction(rawMarkdown, `API error: ${result.status}`); + } + + // Set token usage on span + const usage = result.data.usage; + if (usage) { + span.setAttribute('security_agent.input_tokens', usage.prompt_tokens); + span.setAttribute('security_agent.output_tokens', usage.completion_tokens); + } + + // Emit API metrics (only if o11y client secret is configured) + if (usage && userId && O11Y_KILO_GATEWAY_CLIENT_SECRET) { + const responseToolCalls = result.data.choices?.[0]?.message?.tool_calls ?? []; + const toolsUsed = responseToolCalls + .filter(tc => tc.type === 'function') + .map(tc => `function:${tc.function.name}`); + + emitApiMetrics({ + clientSecret: O11Y_KILO_GATEWAY_CLIENT_SECRET, + kiloUserId: userId, + organizationId, + isAnonymous: false, + isStreaming: false, + userByok: false, + mode: 'security-agent-extraction', + provider: 'anthropic', + requestedModel: model, + resolvedModel: model, + toolsAvailable: ['function:submit_analysis_extraction'], + toolsUsed, + ttfbMs: durationMs, + completeRequestMs: durationMs, + statusCode: 200, + tokens: { + inputTokens: usage.prompt_tokens, + outputTokens: usage.completion_tokens, + totalTokens: usage.total_tokens, + }, + }); + } + + const choice = result.data.choices?.[0]; + if (!choice) { + logError('No choice in response', { correlationId, findingId: finding.id }); + span.setAttribute('security_agent.is_fallback', true); + return createFallbackExtraction(rawMarkdown, 'No response from LLM'); + } + + const message = choice.message; + const toolCall = message.tool_calls?.[0]; + + if (!toolCall || toolCall.type !== 'function') { + logError('No tool call in response', { correlationId, findingId: finding.id }); + span.setAttribute('security_agent.is_fallback', true); + return createFallbackExtraction(rawMarkdown, 'LLM did not call the extraction tool'); + } + + if (toolCall.function.name !== 'submit_analysis_extraction') { + logError('Unexpected tool call', { correlationId, findingId: finding.id, tool: toolCall.function.name }); + span.setAttribute('security_agent.is_fallback', true); + return createFallbackExtraction( + rawMarkdown, + `Unexpected tool: ${toolCall.function.name}` + ); + } + + const parsed = parseExtractionResult(toolCall.function.arguments, rawMarkdown); + if (!parsed) { + span.setAttribute('security_agent.is_fallback', true); + return createFallbackExtraction(rawMarkdown, 'Failed to parse extraction result'); + } + + log('Extraction complete', { + correlationId, + findingId: finding.id, + isExploitable: parsed.isExploitable, + usageLocationsCount: parsed.usageLocations.length, + }); + + span.setAttribute('security_agent.status', 'success'); + span.setAttribute('security_agent.is_exploitable', String(parsed.isExploitable)); + span.setAttribute('security_agent.suggested_action', parsed.suggestedAction); + span.setAttribute('security_agent.is_fallback', false); + + addBreadcrumb({ + category: 'security-agent.extraction', + message: `Extraction outcome: isExploitable=${parsed.isExploitable}`, + level: 'info', + data: { + correlationId, + findingId: finding.id, + isExploitable: parsed.isExploitable, + suggestedAction: parsed.suggestedAction, + isFallback: false, + }, + }); + + return parsed; + } + ); return extractionResult; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); - console.error('[Extraction] Error during extraction:', errorMessage); + logError('Error during extraction', { + correlationId, + findingId: finding.id, + error: errorMessage, + }); captureException(error, { tags: { operation: 'extractSandboxAnalysis' }, - extra: { findingId: finding.id }, + extra: { findingId: finding.id, correlationId }, }); + + addBreadcrumb({ + category: 'security-agent.extraction', + message: 'Extraction fallback used', + level: 'warning', + data: { correlationId, findingId: finding.id, isFallback: true }, + }); + return createFallbackExtraction(rawMarkdown, errorMessage); } } diff --git a/src/lib/security-agent/services/sync-service.ts b/src/lib/security-agent/services/sync-service.ts index 6cc0bef56..367ca4e36 100644 --- a/src/lib/security-agent/services/sync-service.ts +++ b/src/lib/security-agent/services/sync-service.ts @@ -20,6 +20,10 @@ import { type SyncResult, } from '../core/types'; import type { Owner } from '@/lib/code-reviews/core'; +import { sentryLogger } from '@/lib/utils.server'; + +const log = sentryLogger('security-agent:sync', 'info'); +const logError = sentryLogger('security-agent:sync', 'error'); /** * Convert SecurityReviewOwner to Owner type used by agent_configs @@ -45,10 +49,9 @@ export async function syncDependabotAlertsForRepo(params: { repoFullName: string; }): Promise { const { owner, platformIntegrationId, installationId, repoFullName } = params; + const repoStartTime = performance.now(); - console.log( - `[sync-service] Starting sync for ${repoFullName} (installationId=${installationId})` - ); + log(`Starting sync for ${repoFullName}`, { installationId }); const result: SyncResult = { synced: 0, @@ -65,13 +68,12 @@ export async function syncDependabotAlertsForRepo(params: { } // Fetch all alerts from Dependabot - console.log(`[sync-service] Fetching Dependabot alerts for ${repoFullName}...`); const alerts = await fetchAllDependabotAlerts(installationId, repoOwner, repoName); - console.log(`[sync-service] Fetched ${alerts.length} alerts from GitHub for ${repoFullName}`); + log(`Fetched ${alerts.length} alerts from GitHub for ${repoFullName}`); // Parse alerts to our internal format const findings = parseDependabotAlerts(alerts, repoFullName); - console.log(`[sync-service] Parsed ${findings.length} findings for ${repoFullName}`); + log(`Parsed ${findings.length} findings for ${repoFullName}`); // Get SLA config for this owner const config = await getSecurityAgentConfig(toAgentConfigOwner(owner)); @@ -93,7 +95,7 @@ export async function syncDependabotAlertsForRepo(params: { result.synced++; } catch (error) { result.errors++; - console.error(`[sync-service] Error upserting finding for ${repoFullName}:`, error); + logError(`Error upserting finding for ${repoFullName}`, { error, alertNumber: finding.source_id }); captureException(error, { tags: { operation: 'syncDependabotAlertsForRepo', step: 'upsertFinding' }, extra: { repoFullName, alertNumber: finding.source_id }, @@ -101,13 +103,18 @@ export async function syncDependabotAlertsForRepo(params: { } } - console.log( - `[sync-service] Synced ${result.synced} alerts for ${repoFullName} (${result.errors} errors)` - ); + const repoDurationMs = Math.round(performance.now() - repoStartTime); + log(`Repo sync complete`, { + repo: repoFullName, + durationMs: repoDurationMs, + alertsSynced: result.synced, + errors: result.errors, + }); return result; } catch (error) { - console.error(`[sync-service] Error syncing ${repoFullName}:`, error); + const repoDurationMs = Math.round(performance.now() - repoStartTime); + logError(`Error syncing ${repoFullName}`, { durationMs: repoDurationMs, error }); captureException(error, { tags: { operation: 'syncDependabotAlertsForRepo' }, extra: { repoFullName }, @@ -155,7 +162,7 @@ export async function syncAllReposForOwner(params: { successfulRepos++; } catch (error) { totalResult.errors++; - console.error(`[sync-service] Failed to sync ${repoFullName}:`, error); + logError(`Failed to sync ${repoFullName}`, { error }); if (!firstError && error instanceof Error) { firstError = error; } @@ -201,7 +208,7 @@ export async function getEnabledSecurityReviewConfigs(): Promise< const userId = config.owned_by_user_id; if (!orgId && !userId) { - console.log(`[sync-service] Config ${config.id} has no owner, skipping`); + log(`Config ${config.id} has no owner, skipping`); continue; } @@ -223,15 +230,13 @@ export async function getEnabledSecurityReviewConfigs(): Promise< .limit(1); if (!integration || !integration.platform_installation_id) { - console.log(`[sync-service] No GitHub integration found for config ${config.id}, skipping`); + log(`No GitHub integration found for config ${config.id}, skipping`); continue; } // Check if integration has required permissions if (!hasSecurityReviewPermissions(integration)) { - console.log( - `[sync-service] Integration ${integration.id} missing vulnerability_alerts permission, skipping` - ); + log(`Integration ${integration.id} missing vulnerability_alerts permission, skipping`); continue; } @@ -242,9 +247,7 @@ export async function getEnabledSecurityReviewConfigs(): Promise< ); if (allRepositories.length === 0) { - console.log( - `[sync-service] No repositories found for integration ${integration.id}, skipping` - ); + log(`No repositories found for integration ${integration.id}, skipping`); continue; } @@ -264,20 +267,13 @@ export async function getEnabledSecurityReviewConfigs(): Promise< // Only sync selected repositories const selectedIds = new Set(securityConfig.selected_repository_ids); selectedRepos = allRepositories.filter(r => selectedIds.has(r.id)).map(r => r.full_name); - - console.log( - `[sync-service] Config ${config.id} has 'selected' mode with ${selectedRepos.length} repos (from ${allRepositories.length} total)` - ); } else { // Sync all repositories selectedRepos = allRepositories.map(r => r.full_name); - console.log( - `[sync-service] Config ${config.id} has 'all' mode with ${selectedRepos.length} repos` - ); } if (selectedRepos.length === 0) { - console.log(`[sync-service] No selected repositories for config ${config.id}, skipping`); + log(`No selected repositories for config ${config.id}, skipping`); continue; } @@ -305,11 +301,11 @@ export async function runFullSync(): Promise<{ totalErrors: number; configsProcessed: number; }> { - console.log('[sync-service] Starting full security alerts sync...'); - const startTime = Date.now(); + log('Starting full security alerts sync...'); + const startTime = performance.now(); const configs = await getEnabledSecurityReviewConfigs(); - console.log(`[sync-service] Found ${configs.length} enabled configurations`); + log(`Found ${configs.length} enabled configurations`); let totalSynced = 0; let totalErrors = 0; @@ -328,9 +324,9 @@ export async function runFullSync(): Promise<{ } } - const duration = Date.now() - startTime; - console.log( - `[sync-service] Full sync completed in ${duration}ms: ${totalSynced} alerts synced, ${totalErrors} errors, ${configs.length} configs processed` + const duration = Math.round(performance.now() - startTime); + log( + `Full sync completed in ${duration}ms: ${totalSynced} alerts synced, ${totalErrors} errors, ${configs.length} configs processed` ); return { diff --git a/src/lib/security-agent/services/triage-service.ts b/src/lib/security-agent/services/triage-service.ts index 758db6a22..a4699951c 100644 --- a/src/lib/security-agent/services/triage-service.ts +++ b/src/lib/security-agent/services/triage-service.ts @@ -12,7 +12,13 @@ import type OpenAI from 'openai'; import { sendProxiedChatCompletion } from '@/lib/llm-proxy-helpers'; import type { SecurityFinding } from '@/db/schema'; import type { SecurityFindingTriage } from '../core/types'; -import { captureException } from '@sentry/nextjs'; +import { addBreadcrumb, captureException, startSpan } from '@sentry/nextjs'; +import { sentryLogger } from '@/lib/utils.server'; +import { emitApiMetrics } from '@/lib/o11y/api-metrics.server'; +import { O11Y_KILO_GATEWAY_CLIENT_SECRET } from '@/lib/config.server'; + +const log = sentryLogger('security-agent:triage', 'info'); +const logError = sentryLogger('security-agent:triage', 'error'); // Version string for API requests const TRIAGE_SERVICE_VERSION = '5.0.0'; @@ -131,24 +137,24 @@ function parseTriageResult(args: string): SecurityFindingTriage | null { // Validate required fields if (typeof parsed.needsSandboxAnalysis !== 'boolean') { - console.error('[Triage] Invalid needsSandboxAnalysis:', parsed.needsSandboxAnalysis); + logError('Invalid needsSandboxAnalysis', { value: parsed.needsSandboxAnalysis }); return null; } if (typeof parsed.needsSandboxReasoning !== 'string') { - console.error('[Triage] Invalid needsSandboxReasoning:', parsed.needsSandboxReasoning); + logError('Invalid needsSandboxReasoning', { value: parsed.needsSandboxReasoning }); return null; } const validActions = ['dismiss', 'analyze_codebase', 'manual_review']; if (!validActions.includes(parsed.suggestedAction)) { - console.error('[Triage] Invalid suggestedAction:', parsed.suggestedAction); + logError('Invalid suggestedAction', { value: parsed.suggestedAction }); return null; } const validConfidences = ['high', 'medium', 'low']; if (!validConfidences.includes(parsed.confidence)) { - console.error('[Triage] Invalid confidence:', parsed.confidence); + logError('Invalid confidence', { value: parsed.confidence }); return null; } @@ -160,7 +166,7 @@ function parseTriageResult(args: string): SecurityFindingTriage | null { triageAt: new Date().toISOString(), }; } catch (error) { - console.error('[Triage] Failed to parse tool arguments:', error); + logError('Failed to parse tool arguments', { error }); return null; } } @@ -182,18 +188,30 @@ function createFallbackTriage(reason: string): SecurityFindingTriage { * Triage a security finding using direct LLM call with function calling. * Returns a triage result that can be stored in the analysis field. * - * @param finding - The security finding to triage - * @param authToken - Auth token for the LLM proxy - * @param model - Model to use for triage (default: anthropic/claude-sonnet-4) - * @param organizationId - Optional organization ID for usage tracking + * @param options.finding - The security finding to triage + * @param options.authToken - Auth token for the LLM proxy + * @param options.model - Model to use for triage (default: anthropic/claude-sonnet-4) + * @param options.correlationId - Correlation ID for tracing across the analysis pipeline + * @param options.userId - User ID for metrics tracking + * @param options.organizationId - Optional organization ID for usage tracking */ -export async function triageSecurityFinding( - finding: SecurityFinding, - authToken: string, - model: string = 'anthropic/claude-sonnet-4', - organizationId?: string -): Promise { - console.log('[Triage] Starting triage for finding:', finding.id); +export async function triageSecurityFinding(options: { + finding: SecurityFinding; + authToken: string; + model?: string; + correlationId?: string; + userId?: string; + organizationId?: string; +}): Promise { + const { + finding, + authToken, + model = 'anthropic/claude-sonnet-4', + correlationId = '', + userId = '', + organizationId, + } = options; + log('Starting triage', { correlationId, findingId: finding.id }); const messages: ChatMessage[] = [ { @@ -207,71 +225,162 @@ export async function triageSecurityFinding( ]; try { - const result = await sendProxiedChatCompletion({ - authToken, - version: TRIAGE_SERVICE_VERSION, - userAgent: TRIAGE_SERVICE_USER_AGENT, - body: { - model, - messages, - tools: [SUBMIT_TRIAGE_TOOL], - tool_choice: { - type: 'function', - function: { name: 'submit_triage_result' }, - }, - }, - organizationId, - }); - - if (!result.ok) { - console.error('[Triage] API error:', result.status, result.error); - captureException(new Error(`Triage API error: ${result.status}`), { - tags: { operation: 'triageSecurityFinding' }, - extra: { findingId: finding.id, status: result.status, error: result.error }, - }); - return createFallbackTriage(`API error: ${result.status}`); - } - - const choice = result.data.choices?.[0]; - if (!choice) { - console.error('[Triage] No choice in response'); - return createFallbackTriage('No response from LLM'); - } - - const message = choice.message; - const toolCall = message.tool_calls?.[0]; - - if (!toolCall || toolCall.type !== 'function') { - console.error('[Triage] No tool call in response'); - return createFallbackTriage('LLM did not call the triage tool'); - } - - if (toolCall.function.name !== 'submit_triage_result') { - console.error('[Triage] Unexpected tool call:', toolCall.function.name); - return createFallbackTriage(`Unexpected tool: ${toolCall.function.name}`); - } - - const triageResult = parseTriageResult(toolCall.function.arguments); - if (!triageResult) { - return createFallbackTriage('Failed to parse triage result'); - } - - console.log('[Triage] Triage complete:', { - findingId: finding.id, - needsSandboxAnalysis: triageResult.needsSandboxAnalysis, - needsSandboxReasoning: triageResult.needsSandboxReasoning, - suggestedAction: triageResult.suggestedAction, - confidence: triageResult.confidence, - }); + const triageResult = await startSpan( + { name: 'security-agent.triage', op: 'ai.inference' }, + async span => { + span.setAttribute('security_agent.model', model); + span.setAttribute('security_agent.finding_id', finding.id); + span.setAttribute('security_agent.correlation_id', correlationId); + + const llmStart = performance.now(); + + const result = await sendProxiedChatCompletion({ + authToken, + version: TRIAGE_SERVICE_VERSION, + userAgent: TRIAGE_SERVICE_USER_AGENT, + body: { + model, + messages, + tools: [SUBMIT_TRIAGE_TOOL], + tool_choice: { + type: 'function', + function: { name: 'submit_triage_result' }, + }, + }, + organizationId, + }); + + const durationMs = Math.round(performance.now() - llmStart); + span.setAttribute('security_agent.duration_ms', durationMs); + + if (!result.ok) { + logError('Triage API error', { + correlationId, + findingId: finding.id, + status: result.status, + }); + captureException(new Error(`Triage API error: ${result.status}`), { + tags: { operation: 'triageSecurityFinding' }, + extra: { + findingId: finding.id, + status: result.status, + error: result.error, + correlationId, + }, + }); + + span.setAttribute('security_agent.status', 'error'); + span.setAttribute('security_agent.is_fallback', true); + + addBreadcrumb({ + category: 'security-agent.triage', + message: 'Triage fallback used', + level: 'warning', + data: { correlationId, findingId: finding.id, isFallback: true }, + }); + + return createFallbackTriage(`API error: ${result.status}`); + } + + // Set token usage on span + const usage = result.data.usage; + if (usage) { + span.setAttribute('security_agent.input_tokens', usage.prompt_tokens); + span.setAttribute('security_agent.output_tokens', usage.completion_tokens); + } + + // Emit API metrics (only if o11y client secret is configured) + if (usage && userId && O11Y_KILO_GATEWAY_CLIENT_SECRET) { + const responseToolCalls = result.data.choices?.[0]?.message?.tool_calls ?? []; + const toolsUsed = responseToolCalls + .filter(tc => tc.type === 'function') + .map(tc => `function:${tc.function.name}`); + + emitApiMetrics({ + clientSecret: O11Y_KILO_GATEWAY_CLIENT_SECRET, + kiloUserId: userId, + organizationId, + isAnonymous: false, + isStreaming: false, + userByok: false, + mode: 'security-agent-triage', + provider: 'anthropic', + requestedModel: model, + resolvedModel: model, + toolsAvailable: ['function:submit_triage_result'], + toolsUsed, + ttfbMs: durationMs, + completeRequestMs: durationMs, + statusCode: 200, + tokens: { + inputTokens: usage.prompt_tokens, + outputTokens: usage.completion_tokens, + totalTokens: usage.total_tokens, + }, + }); + } + + const choice = result.data.choices?.[0]; + if (!choice) { + logError('No choice in response', { correlationId, findingId: finding.id }); + span.setAttribute('security_agent.is_fallback', true); + return createFallbackTriage('No response from LLM'); + } + + const message = choice.message; + const toolCall = message.tool_calls?.[0]; + + if (!toolCall || toolCall.type !== 'function') { + logError('No tool call in response', { correlationId, findingId: finding.id }); + span.setAttribute('security_agent.is_fallback', true); + return createFallbackTriage('LLM did not call the triage tool'); + } + + if (toolCall.function.name !== 'submit_triage_result') { + logError('Unexpected tool call', { correlationId, findingId: finding.id, tool: toolCall.function.name }); + span.setAttribute('security_agent.is_fallback', true); + return createFallbackTriage(`Unexpected tool: ${toolCall.function.name}`); + } + + const parsed = parseTriageResult(toolCall.function.arguments); + if (!parsed) { + span.setAttribute('security_agent.is_fallback', true); + return createFallbackTriage('Failed to parse triage result'); + } + + span.setAttribute('security_agent.status', 'success'); + span.setAttribute('security_agent.suggested_action', parsed.suggestedAction); + span.setAttribute('security_agent.confidence', parsed.confidence); + span.setAttribute('security_agent.is_fallback', false); + + log('Triage complete', { + correlationId, + findingId: finding.id, + suggestedAction: parsed.suggestedAction, + confidence: parsed.confidence, + needsSandboxAnalysis: parsed.needsSandboxAnalysis, + }); + + return parsed; + } + ); return triageResult; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); - console.error('[Triage] Error during triage:', errorMessage); + logError('Error during triage', { correlationId, findingId: finding.id, error: errorMessage }); captureException(error, { tags: { operation: 'triageSecurityFinding' }, - extra: { findingId: finding.id }, + extra: { findingId: finding.id, correlationId }, }); + + addBreadcrumb({ + category: 'security-agent.triage', + message: 'Triage fallback used', + level: 'warning', + data: { correlationId, findingId: finding.id, isFallback: true }, + }); + return createFallbackTriage(errorMessage); } }