Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .codacyrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"exclude_paths": [
"banners/**",
"docs/**",
"modules/rootkit/**",
"parquets/**",
"persistent_chroma_db/**",
"sessions/**",
"static/**",
"test/**",
"external/**",
"**/*.png",
"**/*.ico",
"**/*.ttf",
"**/*.so",
"**/*.o",
"**/*.pyc",
"**/*.db",
"**/*.bin",
"**/*.exe",
"**/*.elf",
"**/*.macho",
"**/*.gz"
]
}
21 changes: 21 additions & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
banners/
docs/
modules/rootkit/
parquets/
persistent_chroma_db/
sessions/
static/
test/
external/
*.png
*.ico
*.ttf
*.so
*.o
*.pyc
*.db
*.bin
*.exe
*.elf
*.macho
*.gz
27 changes: 27 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"env": {
"browser": true,
"es2021": true,
"node": true
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 12,
"sourceType": "module"
},
"rules": {},
"ignorePatterns": [
"banners/",
"docs/",
"modules/rootkit/",
"parquets/",
"persistent_chroma_db/",
"sessions/",
"static/",
"*.png",
"*.ico",
"*.ttf",
"*.so",
"*.o"
]
}
3 changes: 3 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[MASTER]
ignore=banners,docs,modules/rootkit,parquets,persistent_chroma_db,sessions,static,test,external
ignore-patterns=.*\.png,.*\.ico,.*\.ttf,.*\.so,.*\.o,.*\.pyc,.*\.db,.*\.bin,.*\.exe,.*\.elf,.*\.macho,.*\.gz
20 changes: 20 additions & 0 deletions modules/moe_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,26 @@ def update(self, reward: float, detection_prob: float) -> None:
"Gemma 2 9B for output analysis, log parsing, and report synthesis."
),
),
ExpertProfile(
expert_id="groq_cloud",
backend="groq",
model="llama-3.3-70b-versatile",
capabilities=["cloud_enum", "cloud_exploit", "iam_analysis"],
base_weight=0.75,
cost_tier=2,
latency_ms=2000,
description="Specialized expert for AWS/Azure/GCP enumeration and IAM exploitation.",
),
Comment on lines +201 to +210
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[CRITICAL_BUG] Confirm the model name and backend pairing are valid for your deployment: 'llama-3.3-70b-versatile' + backend='groq' may not be available or may require different identifiers / credentials. Add a startup-time validation (or fallback) that checks the model is reachable and the backend supports it to avoid runtime failures. For consistency follow the pattern used for ollama_reason where the model is read from env (see lines ~177-186) — prefer environment configuration for large/externally-hosted models so deployments can opt-out or swap models without code changes.

# modules/moe_router.py
ExpertProfile(
    expert_id="groq_cloud",
    backend="groq",
    model=os.environ.get("GROQ_CLOUD_MODEL", "llama-3.3-70b-versatile"),
    capabilities=["cloud_enum", "cloud_exploit", "iam_analysis"],
    base_weight=0.75,
    cost_tier=2,
    latency_ms=2000,
    description="Specialized expert for AWS/Azure/GCP enumeration and IAM exploitation.",
),
ExpertProfile(
    expert_id="groq_container",
    backend="groq",
    model=os.environ.get("GROQ_CONTAINER_MODEL", "llama-3.3-70b-versatile"),
    capabilities=["container_escape", "k8s_enum", "docker_audit"],
    base_weight=0.75,
    cost_tier=2,
    latency_ms=2000,
    description="Specialized expert for Kubernetes, Docker, and container escape techniques.",
),

ExpertProfile(
expert_id="groq_container",
backend="groq",
model="llama-3.3-70b-versatile",
capabilities=["container_escape", "k8s_enum", "docker_audit"],
base_weight=0.75,
cost_tier=2,
latency_ms=2000,
description="Specialized expert for Kubernetes, Docker, and container escape techniques.",
),
Comment on lines +201 to +220
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[CRITICAL_BUG] These experts expose capabilities like 'cloud_exploit' and 'iam_analysis' which imply handling sensitive cloud metadata and potentially credentials. Ensure there are safeguards preventing leakage of secrets to external/backed models (audit logs, redaction, and an explicit policy gating such tasks to trusted/local backends). Add capability-to-policy mapping in code that requires human approval or uses offline models for dangerous operations.

# modules/moe_router.py (conceptual example – policy wiring likely lives elsewhere)
SENSITIVE_CAPABILITIES = {
    "cloud_exploit": "offline_only",
    "iam_analysis": "offline_preferred",
    "container_escape": "offline_preferred",
}

# When selecting experts, enforce a policy check, e.g. in the router’s selection logic:
if any(cap in SENSITIVE_CAPABILITIES for cap in requested_caps):
    # filter to trusted/local backends
    candidates = [
        e for e in candidates
        if e.backend in ("ollama", "local_llm")
    ]

]


Expand Down
35 changes: 35 additions & 0 deletions modules/obs_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class FindingType(str, Enum):
DOMAIN = "domain"
EMAIL = "email"
ERROR = "error"
CLOUD_ROLE = "cloud_role"
K8S_RESOURCE = "k8s_resource"


@dataclass
Expand Down Expand Up @@ -315,6 +317,38 @@ def extract(self, text: str, host: str) -> List[Finding]:
return results


class _CloudIdentityExtractor(Extractor):
"""Extracts IAM roles, ARNs, and K8s resources from cloud tool output."""
_PATTERNS = [
# AWS ARN
re.compile(r'arn:aws:iam::\d{12}:[a-zA-Z0-9:/._-]+'),
# Azure Resource ID
re.compile(r'/subscriptions/[a-f0-9-]{36}/resourceGroups/[a-zA-Z0-9._-]+'),
# K8s resources
re.compile(r'\b(pod|deployment|service|namespace|secret)/[a-z0-9-]{1,63}\b'),
]
Comment on lines +323 to +329
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[VALIDATION] Azure resource regex is too permissive and doesn't anchor GUID structure. Current pattern '/subscriptions/[a-f0-9-]{36}/resourceGroups/...' will match noise and is case-sensitive. Use a more precise regex (with case-insensitive flag) to match GUID format: '/subscriptions/[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}/resourceGroups/[^/]+'. Also compile the pattern with re.IGNORECASE to accept uppercase hex in GUIDs.

class _CloudIdentityExtractor(Extractor):
    """Extracts IAM roles, ARNs, and K8s resources from cloud tool output."""
    _PATTERNS = [
        # AWS ARN
        re.compile(r"arn:aws:iam::\d{12}:[a-zA-Z0-9:/._-]+"),
        # Azure Resource ID (GUID-anchored, case-insensitive)
        re.compile(
            r"/subscriptions/[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}/resourceGroups/[a-zA-Z0-9._-]+",
            re.IGNORECASE,
        ),
        # K8s resources
        re.compile(r"\b(pod|deployment|service|namespace|secret)/[a-z0-9-]{1,63}\b"),
    ]


def extract(self, text: str, host: str) -> List[Finding]:
seen: set = set()
results: List[Finding] = []
for pat in self._PATTERNS:
for m in pat.finditer(text):
val = m.group()
if val not in seen:
seen.add(val)
ftype = FindingType.CLOUD_ROLE
if "arn:aws" in val or "/subscriptions/" in val:
ftype = FindingType.CLOUD_ROLE
else:
ftype = FindingType.K8S_RESOURCE

results.append(Finding(
ftype, val,
host=host, confidence=0.95, raw=m.group()
))
return results


Comment on lines +320 to +351
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[VALIDATION] Extraction logic conflates different cloud artifact types and can mislabel Azure resource IDs as CLOUD_ROLE. Instead of defaulting ftype to CLOUD_ROLE and using a simple substring check, explicitly detect and classify: - AWS IAM ARNs (role/user/policy) -> separate FindingType (e.g. AWS_ARN or CLOUD_ROLE with subtype), - Azure resource IDs -> a distinct FindingType (e.g. AZURE_RESOURCE), - K8s matches -> K8S_RESOURCE. Prefer using regex capture groups to determine the exact kind (role vs service vs resource) and populate a subtype or metadata field rather than overloading CLOUD_ROLE.

class _CloudIdentityExtractor(Extractor):
    """Extracts IAM roles, ARNs, Azure resources, and K8s resources from cloud tool output."""
    _PATTERNS = [
        # AWS IAM ARNs: role / user / policy / assumed-role
        re.compile(r"arn:aws:iam::(?P<account>\d{12}):(?P<kind>role|user|policy|assumed-role)/(?P<name>[A-Za-z0-9+=,.@_\-/]+)"),
        # Azure Resource ID (GUID-anchored, case-insensitive)
        re.compile(
            r"/subscriptions/(?P<sub>[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})/resourceGroups/(?P<rg>[^/]+)(?P<rest>/providers/[^\s'\"]+)?",
            re.IGNORECASE,
        ),
        # K8s resources
        re.compile(r"\b(?P<kind>pod|deployment|service|namespace|secret)/(?P<name>[a-z0-9-]{1,63})\b"),
    ]

    def extract(self, text: str, host: str) -> List[Finding]:
        seen: set = set()
        results: List[Finding] = []
        for pat in self._PATTERNS:
            for m in pat.finditer(text):
                raw = m.group(0)
                if raw in seen:
                    continue
                seen.add(raw)

                if raw.startswith("arn:aws:iam::"):
                    ftype = FindingType.CLOUD_ROLE
                elif raw.lower().startswith("/subscriptions/"):
                    ftype = FindingType.CLOUD_ROLE  # or a new AZURE_RESOURCE type if added
                else:
                    ftype = FindingType.K8S_RESOURCE

                results.append(Finding(
                    type=ftype,
                    value=raw,
                    host=host,
                    confidence=0.95,
                    raw=raw,
                ))
        return results

# ---------------------------------------------------------------------------
# Success heuristic
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -372,6 +406,7 @@ def __init__(self) -> None:
_UsernameExtractor(),
_DomainExtractor(),
_ErrorExtractor(),
_CloudIdentityExtractor(),
]:
self._registry.register(ext)

Expand Down
Loading
Loading