Skip to content

Commit 461b582

Browse files
committed
modified: api/schemas/schemas.go
modified: internal/analysis/active/taint/analyzer.go modified: internal/analysis/active/taint/types.go modified: internal/knowledgegraph/knowledgegraph.go modified: internal/results/enrich.go modified: internal/results/pipeline.go new file: internal/results/providers/cwe_providers.go
1 parent 3fa2e5f commit 461b582

7 files changed

Lines changed: 769 additions & 673 deletions

File tree

api/schemas/schemas.go

Lines changed: 189 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,58 @@ import (
1414
type TaskType string
1515

1616
const (
17-
TaskAgentMission TaskType = "AGENT_MISSION"
18-
TaskAnalyzeWebPageTaint TaskType = "ANALYZE_WEB_PAGE_TAINT"
17+
TaskAgentMission TaskType = "AGENT_MISSION"
18+
TaskAnalyzeWebPageTaint TaskType = "ANALYZE_WEB_PAGE_TAINT"
1919
TaskAnalyzeWebPageProtoPP TaskType = "ANALYZE_WEB_PAGE_PROTOPP"
20-
TaskTestRaceCondition TaskType = "TEST_RACE_CONDITION"
21-
TaskTestAuthATO TaskType = "TEST_AUTH_ATO"
22-
TaskTestAuthIDOR TaskType = "TEST_AUTH_IDOR"
23-
TaskAnalyzeHeaders TaskType = "ANALYZE_HEADERS"
24-
TaskAnalyzeJWT TaskType = "ANALYZE_JWT"
20+
TaskTestRaceCondition TaskType = "TEST_RACE_CONDITION"
21+
TaskTestAuthATO TaskType = "TEST_AUTH_ATO"
22+
TaskTestAuthIDOR TaskType = "TEST_AUTH_IDOR"
23+
TaskAnalyzeHeaders TaskType = "ANALYZE_HEADERS"
24+
TaskAnalyzeJWT TaskType = "ANALYZE_JWT"
2525
)
2626

27+
// Task represents a unit of work to be executed by the engine.
28+
// This is central to how the system decouples discovery from execution.
29+
type Task struct {
30+
TaskID string `json:"task_id"`
31+
ScanID string `json:"scan_id"`
32+
Type TaskType `json:"type"`
33+
TargetURL string `json:"target_url"`
34+
Parameters interface{} `json:"parameters"` // Holds task specific configuration.
35+
}
36+
37+
// -- Task Parameter Definitions --
38+
39+
// ATOTaskParams defines parameters for the Account Takeover task.
40+
type ATOTaskParams struct {
41+
Usernames []string `json:"usernames"`
42+
}
43+
44+
// IDORTaskParams defines parameters for the IDOR task.
45+
type IDORTaskParams struct {
46+
HTTPMethod string `json:"http_method"`
47+
HTTPBody string `json:"http_body"`
48+
HTTPHeaders map[string]string `json:"http_headers"`
49+
}
50+
51+
// JWTTaskParams defines parameters for the JWT analysis task.
52+
type JWTTaskParams struct {
53+
Token string `json:"token"`
54+
}
55+
56+
// AgentMissionParams defines parameters for the Agent mission task.
57+
type AgentMissionParams struct {
58+
MissionBrief string `json:"mission_brief"`
59+
}
60+
2761
// Severity defines the severity level of a finding.
2862
type Severity string
2963

3064
const (
31-
SeverityCritical Severity = "CRITICAL"
32-
SeverityHigh Severity = "HIGH"
33-
SeverityMedium Severity = "MEDIUM"
34-
SeverityLow Severity = "LOW"
65+
SeverityCritical Severity = "CRITICAL"
66+
SeverityHigh Severity = "HIGH"
67+
SeverityMedium Severity = "MEDIUM"
68+
SeverityLow Severity = "LOW"
3569
SeverityInformational Severity = "INFORMATIONAL"
3670
)
3771

@@ -44,25 +78,101 @@ type Vulnerability struct {
4478
// Finding represents a specific instance of a vulnerability discovered during a scan.
4579
type Finding struct {
4680
ID string `json:"id"`
81+
ScanID string `json:"scan_id"`
4782
TaskID string `json:"task_id"`
4883
Timestamp time.Time `json:"timestamp"`
49-
Target string `json:"target"` // The specific URL or asset where the finding was discovered.
50-
Module string `json:"module"` // The name of the module/engine that produced the finding.
84+
Target string `json:"target"`
85+
Module string `json:"module"`
5186
Vulnerability Vulnerability `json:"vulnerability"`
5287
Severity Severity `json:"severity"`
53-
Description string `json:"description"` // Specific details about this particular finding.
54-
Evidence string `json:"evidence"` // Concrete evidence, like a request/response pair or log entry.
55-
Recommendation string `json:"recommendation"` // Steps to mitigate or fix the vulnerability.
56-
CWE []string `json:"cwe,omitempty"` // Associated CWEs.
88+
Description string `json:"description"`
89+
Evidence string `json:"evidence"`
90+
Recommendation string `json:"recommendation"`
91+
CWE []string `json:"cwe,omitempty"`
5792
}
5893

94+
// -- IAST (Interactive Application Security Testing) Schemas --
95+
96+
// ProbeType defines the category of the attack payload.
97+
type ProbeType string
98+
99+
const (
100+
ProbeTypeXSS ProbeType = "XSS"
101+
ProbeTypeSSTI ProbeType = "SSTI" // Server-Side Template Injection
102+
ProbeTypeSQLi ProbeType = "SQLI" // SQL Injection
103+
ProbeTypeCmdInjection ProbeType = "CMD_INJECTION"
104+
ProbeTypeOAST ProbeType = "OAST" // Out-of-Band Application Security Testing
105+
ProbeTypeDOMClobbering ProbeType = "DOM_CLOBBERING"
106+
ProbeTypePrototypePollution ProbeType = "PROTOTYPE_POLLUTION"
107+
ProbeTypeGeneric ProbeType = "GENERIC" // For generic data flow tracking.
108+
)
109+
110+
// TaintSource identifies where the tainted data originated.
111+
type TaintSource string
112+
113+
const (
114+
// Client side persistent storage
115+
SourceCookie TaintSource = "COOKIE"
116+
SourceLocalStorage TaintSource = "LOCAL_STORAGE"
117+
SourceSessionStorage TaintSource = "SESSION_STORAGE"
118+
119+
// Client side transient sources
120+
SourceURLParam TaintSource = "URL_PARAM"
121+
SourceHashFragment TaintSource = "HASH_FRAGMENT"
122+
SourceReferer TaintSource = "REFERER"
123+
SourceDOMInput TaintSource = "DOM_INPUT" // Data entered via forms/interaction.
124+
SourceDOM TaintSource = "DOM" // Data read from existing DOM (e.g., window.name).
125+
126+
// Communication channels
127+
SourceWebSocket TaintSource = "WEB_SOCKET" // Data received from server via WebSocket.
128+
SourcePostMessage TaintSource = "POST_MESSAGE" // Data received from other windows/workers.
129+
)
130+
131+
// TaintSink identifies the dangerous function or property where tainted data landed.
132+
type TaintSink string
133+
134+
const (
135+
// Execution Sinks
136+
SinkEval TaintSink = "EVAL"
137+
SinkFunctionConstructor TaintSink = "FUNCTION_CONSTRUCTOR"
138+
139+
// DOM Manipulation Sinks
140+
SinkInnerHTML TaintSink = "INNER_HTML"
141+
SinkOuterHTML TaintSink = "OUTER_HTML"
142+
SinkDocumentWrite TaintSink = "DOCUMENT_WRITE"
143+
144+
// Resource Loading Sinks
145+
SinkScriptSrc TaintSink = "SCRIPT_SRC"
146+
SinkIframeSrc TaintSink = "IFRAME_SRC"
147+
SinkIframeSrcDoc TaintSink = "IFRAME_SRCDOC"
148+
SinkWorkerSrc TaintSink = "WORKER_SRC"
149+
150+
// Navigation Sinks
151+
SinkNavigation TaintSink = "NAVIGATION" // e.g., location.href, window.open
152+
153+
// Network/Exfiltration Sinks
154+
SinkFetch TaintSink = "FETCH_BODY"
155+
SinkFetch_URL TaintSink = "FETCH_URL"
156+
SinkXMLHTTPRequest TaintSink = "XHR_BODY"
157+
SinkXMLHTTPRequest_URL TaintSink = "XHR_URL"
158+
SinkWebSocketSend TaintSink = "WEBSOCKET_SEND"
159+
SinkSendBeacon TaintSink = "SEND_BEACON"
160+
161+
// IPC Sinks
162+
SinkPostMessage TaintSink = "POST_MESSAGE"
163+
SinkWorkerPostMessage TaintSink = "WORKER_POST_MESSAGE"
164+
165+
// Special Confirmation Sinks (High Confidence)
166+
SinkExecution TaintSink = "EXECUTION_PROOF"
167+
SinkOASTInteraction TaintSink = "OAST_INTERACTION"
168+
SinkPrototypePollution TaintSink = "PROTOTYPE_POLLUTION_CONFIRMED"
169+
)
170+
59171
// -- Canonical Knowledge Graph Data Model --
60172

61173
// NodeType defines the type of a node in the knowledge graph.
62-
// Using a dedicated type enhances clarity and allows for compile time checks.
63174
type NodeType string
64175

65-
// These provide a controlled vocabulary for node types.
66176
const (
67177
NodeHost NodeType = "HOST"
68178
NodeIPAddress NodeType = "IP_ADDRESS"
@@ -75,12 +185,12 @@ const (
75185
NodeObservation NodeType = "OBSERVATION"
76186
NodeTool NodeType = "TOOL"
77187
NodeFile NodeType = "FILE"
188+
NodeDomain NodeType = "DOMAIN"
78189
)
79190

80-
// RelationshipType defines the type of an edge between nodes in the knowledge graph.
191+
// RelationshipType defines the type of an edge between nodes.
81192
type RelationshipType string
82193

83-
// These establish a formal set of relationship labels.
84194
const (
85195
RelationshipResolvesTo RelationshipType = "RESOLVES_TO"
86196
RelationshipLinksTo RelationshipType = "LINKS_TO"
@@ -89,6 +199,8 @@ const (
89199
RelationshipExposes RelationshipType = "EXPOSES"
90200
RelationshipExecuted RelationshipType = "EXECUTED"
91201
RelationshipHasObservation RelationshipType = "HAS_OBSERVATION"
202+
RelationshipHostsURL RelationshipType = "HOSTS_URL"
203+
RelationshipHasSubdomain RelationshipType = "HAS_SUBDOMAIN"
92204
)
93205

94206
// NodeStatus defines the state of a node, useful for tracking analysis progress.
@@ -102,68 +214,58 @@ const (
102214
)
103215

104216
// Node represents a single entity in the Knowledge Graph.
105-
// This struct is the canonical representation for ALL nodes.
106-
// It is aligned with the KnowledgeGraph interface and database schema.
107217
type Node struct {
108-
// A universally unique identifier for the node.
109-
// Stored as a string to be compatible with the KG interface.
110-
ID string `json:"id"`
111-
112-
// Type categorizes the node, using the predefined NodeType constants.
113-
Type NodeType `json:"type"`
114-
115-
// A human readable label for the node.
116-
Label string `json:"label"`
117-
118-
// Status tracks the analysis state of the node.
119-
Status NodeStatus `json:"status"`
120-
121-
// An open map to store any additional properties.
122-
// Using json.RawMessage is a robust way to handle arbitrary JSON data
123-
// without the performance hit of map[string]interface{}.
218+
ID string `json:"id"`
219+
Type NodeType `json:"type"`
220+
Label string `json:"label"`
221+
Status NodeStatus `json:"status"`
124222
Properties json.RawMessage `json:"properties"`
125-
126-
// Timestamps for tracking when the node was first created and last seen.
127-
CreatedAt time.Time `json:"created_at"`
128-
LastSeen time.Time `json:"last_seen"`
223+
CreatedAt time.Time `json:"created_at"`
224+
LastSeen time.Time `json:"last_seen"`
129225
}
130226

131-
// Edge represents a directed, labeled relationship between two nodes in the Knowledge Graph.
132-
// This struct is the canonical representation for ALL edges.
227+
// Edge represents a directed, labeled relationship between two nodes.
133228
type Edge struct {
134-
// A universally unique identifier for the edge.
135-
ID string `json:"id"`
136-
137-
// The ID of the node where the edge originates. Renamed from Source for consistency.
138-
From string `json:"from"`
139-
140-
// The ID of the node where the edge terminates. Renamed from Target for consistency.
141-
To string `json:"to"`
142-
143-
// Type describes the nature of the relationship, using predefined RelationshipType constants.
144-
// Renamed from Label for clarity and consistency with the database schema.
145-
Type RelationshipType `json:"type"`
146-
147-
// A human readable label for the edge, for display or contextual purposes.
148-
Label string `json:"label"`
149-
150-
// An open map to store additional properties, such as weights or timestamps.
151-
Properties json.RawMessage `json:"properties"`
152-
153-
// Timestamps for tracking when the edge was first created and last seen.
154-
CreatedAt time.Time `json:"created_at"`
155-
LastSeen time.Time `json:"last_seen"`
229+
ID string `json:"id"`
230+
From string `json:"from"`
231+
To string `json:"to"`
232+
Type RelationshipType `json:"type"`
233+
Label string `json:"label"`
234+
Properties json.RawMessage `json:"properties"`
235+
CreatedAt time.Time `json:"created_at"`
236+
LastSeen time.Time `json:"last_seen"`
156237
}
157238

158-
// KnowledgeGraphUpdate represents a batch of updates to be applied to the knowledge graph.
239+
// KnowledgeGraphUpdate represents a batch of updates for the knowledge graph.
159240
type KnowledgeGraphUpdate struct {
160241
Nodes []Node `json:"nodes"`
161242
Edges []Edge `json:"edges"`
162243
}
163244

245+
// -- Input Schemas for Bulk Operations --
246+
247+
// NodeInput is a helper struct for bulk inserting or updating nodes.
248+
type NodeInput struct {
249+
ID string
250+
Type NodeType
251+
Label string
252+
Status NodeStatus
253+
Properties json.RawMessage
254+
}
255+
256+
// EdgeInput is a helper struct for bulk inserting or updating edges.
257+
type EdgeInput struct {
258+
ID string
259+
From string // Source Node ID
260+
To string // Target Node ID
261+
Type RelationshipType
262+
Label string
263+
Properties json.RawMessage
264+
}
265+
164266
// -- Communication & Result Schemas --
165267

166-
// ResultEnvelope is the top-level wrapper for all results produced by a single task.
268+
// ResultEnvelope is the top level wrapper for all results from a single task.
167269
type ResultEnvelope struct {
168270
ScanID string `json:"scan_id"`
169271
TaskID string `json:"task_id"`
@@ -174,13 +276,13 @@ type ResultEnvelope struct {
174276

175277
// -- Browser & Artifact Schemas --
176278

177-
// InteractionConfig defines the parameters for the automated page interactor.
279+
// InteractionConfig defines parameters for the automated page interactor.
178280
type InteractionConfig struct {
179-
MaxDepth int `json:"max_depth"`
281+
MaxDepth int `json:"max_depth"`
180282
MaxInteractionsPerDepth int `json:"max_interactions_per_depth"`
181-
InteractionDelayMs int `json:"interaction_delay_ms"`
182-
PostInteractionWaitMs int `json:"post_interaction_wait_ms"`
183-
CustomInputData map[string]string `json:"custom_input_data,omitempty"` // User provided data for specific inputs (key: 'id' or 'name' attribute).
283+
InteractionDelayMs int `json:"interaction_delay_ms"`
284+
PostInteractionWaitMs int `json:"post_interaction_wait_ms"`
285+
CustomInputData map[string]string `json:"custom_input_data,omitempty"`
184286
}
185287

186288
// ConsoleLog represents a single entry from the browser's console.
@@ -246,13 +348,13 @@ type PageTimings struct {
246348
}
247349

248350
type Entry struct {
249-
Pageref string `json:"pageref"`
351+
Pageref string `json:"pageref"`
250352
StartedDateTime time.Time `json:"startedDateTime"`
251-
Time float64 `json:"time"`
252-
Request Request `json:"request"`
253-
Response Response `json:"response"`
254-
Cache struct{} `json:"cache"`
255-
Timings Timings `json:"timings"`
353+
Time float64 `json:"time"`
354+
Request Request `json:"request"`
355+
Response Response `json:"response"`
356+
Cache struct{} `json:"cache"`
357+
Timings Timings `json:"timings"`
256358
}
257359

258360
type Request struct {
@@ -360,16 +462,16 @@ type LLMClient interface {
360462

361463
// -- Engine Interfaces --
362464

363-
// DiscoveryEngine defines the interface for an engine that discovers potential targets or tasks.
465+
// DiscoveryEngine defines the interface for an engine that discovers potential tasks.
364466
type DiscoveryEngine interface {
365-
// Start begins the discovery process, returning a channel that will stream findings.
366-
Start(ctx context.Context, targets string) (<-chan Finding, error)
467+
// Start kicks off the discovery process, returning a channel that will stream tasks.
468+
Start(ctx context.Context, targets []string) (<-chan Task, error)
367469
Stop()
368470
}
369471

370-
// TaskEngine defines the interface for an engine that executes tasks based on findings.
472+
// TaskEngine defines the interface for an engine that executes tasks.
371473
type TaskEngine interface {
372-
// Start begins processing findings from a channel.
373-
Start(ctx context.Context, taskChan <-chan Finding)
474+
// Start begins processing tasks from a channel.
475+
Start(ctx context.Context, taskChan <-chan Task)
374476
Stop()
375-
}
477+
}

0 commit comments

Comments
 (0)