Skip to content

Commit ee8e038

Browse files
authored
Remote command execution in microVM instances via vsock (#9)
* Add Dockerfile for initial ramdisk environment * Exec API * Fix exec test * Use websocket * Fix vsocket protocol handling and remove initrd versioning * exec works but this commit needs de-cruft * Cleanup test * Simplify initrd build + test * Simplify initrd by removing versioning and removing docker build * Delete extra file * Fix comment * Fix non deterministic test result * Improve error message from test * More logging * Switch to gRPC over vsock * Build before test * Extract exec, avoid circular dependency * Add README * POC cli for exec * Improvements 1. Protocol Extensions (lib/exec/exec.proto) - Added user (string) and uid (int32) fields to ExecStart - Added env (map<string,string>) for environment variables - Added cwd (string) for working directory - Added timeout_seconds (int32) for execution timeout - Added SignalType enum with common Unix signals (SIGHUP, SIGINT, SIGQUIT, SIGKILL, SIGTERM, SIGSTOP, SIGCONT) - Added Signal message to ExecRequest oneof 2. Guest Agent (lib/system/exec_agent/main.go) - Implemented user/UID switching using syscall.Credential - Environment variable merging with buildEnv() - Working directory support - Signal handling via sendSignal() helper - Timeout support using context with deadline - Enhanced logging with all execution parameters 3. Client Library (lib/exec/client.go) - Added User, UID, Env, Cwd, Timeout fields to ExecOptions - All fields properly wired through to gRPC ExecStart message 4. API Layer (cmd/api/api/exec.go) - Changed to POST endpoint (semantically correct for mutations) - Reads JSON configuration from first WebSocket message - Comprehensive audit logging: JWT subject, instance ID, command, all parameters, start/end time, exit code, duration - Sends exit code in final JSON message: {"exitCode": N} 5. API Routing (cmd/api/main.go) - Changed route from Get() to Post() for /instances/{id}/exec 6. CLI Tool (cmd/exec/main.go) - Added flags: --user, --uid, --env/-e, --cwd, --timeout - Auto-detect TTY (if stdin/stdout are terminals) - Sends JSON as first WebSocket message - Parses exit code from final JSON message - Exits with remote exit code (or 255 for transport errors, 130 for Ctrl-C) 7. Documentation (lib/exec/README.md) - Updated API endpoint documentation - Documented all new parameters and their usage - Added comprehensive CLI examples - Added Security & Authorization section - Added Observability section with log examples - Documented signal support and timeout behavior - Documented exit codes * Must use GET * chroot before running exec-agent, and wip to fix signal handling * Delete partially working: user, window resizing * Delete redundant README
1 parent 3cfa5fd commit ee8e038

32 files changed

Lines changed: 2774 additions & 768 deletions

.github/workflows/test.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ jobs:
3030
with:
3131
username: ${{ secrets.DOCKERHUB_USERNAME }}
3232
password: ${{ secrets.DOCKERHUB_PASSWORD }}
33-
34-
- name: Run tests
35-
run: make test
36-
33+
3734
- name: Build
3835
run: make build
36+
37+
- name: Run tests
38+
run: make test

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,6 @@ tmp/**
1717

1818
# Cloud Hypervisor binaries (embedded at build time)
1919
lib/vmm/binaries/cloud-hypervisor/*/*/cloud-hypervisor
20+
cloud-hypervisor
21+
cloud-hypervisor/**
22+
lib/system/exec_agent/exec-agent

Makefile

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,15 @@ generate-wire: $(WIRE)
7676
@echo "Generating wire code..."
7777
cd ./cmd/api && $(WIRE)
7878

79+
# Generate gRPC code from proto
80+
generate-grpc:
81+
@echo "Generating gRPC code from proto..."
82+
protoc --go_out=. --go_opt=paths=source_relative \
83+
--go-grpc_out=. --go-grpc_opt=paths=source_relative \
84+
lib/exec/exec.proto
85+
7986
# Generate all code
80-
generate-all: oapi-generate generate-vmm-client generate-wire
87+
generate-all: oapi-generate generate-vmm-client generate-wire generate-grpc
8188

8289
# Check if binaries exist, download if missing
8390
.PHONY: ensure-ch-binaries
@@ -87,16 +94,28 @@ ensure-ch-binaries:
8794
$(MAKE) download-ch-binaries; \
8895
fi
8996

97+
# Build exec-agent (guest binary) into its own directory for embedding
98+
lib/system/exec_agent/exec-agent: lib/system/exec_agent/main.go
99+
@echo "Building exec-agent..."
100+
cd lib/system/exec_agent && CGO_ENABLED=0 go build -ldflags="-s -w" -o exec-agent .
101+
90102
# Build the binary
91-
build: ensure-ch-binaries | $(BIN_DIR)
103+
build: ensure-ch-binaries lib/system/exec_agent/exec-agent | $(BIN_DIR)
92104
go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api
93105

106+
# Build exec CLI
107+
build-exec: | $(BIN_DIR)
108+
go build -o $(BIN_DIR)/hypeman-exec ./cmd/exec
109+
110+
# Build all binaries
111+
build-all: build build-exec
112+
94113
# Run in development mode with hot reload
95114
dev: $(AIR)
96115
$(AIR) -c .air.toml
97116

98117
# Run tests
99-
test: ensure-ch-binaries
118+
test: ensure-ch-binaries lib/system/exec_agent/exec-agent
100119
go test -tags containers_image_openpgp -v -timeout 30s ./...
101120

102121
# Generate JWT token for testing
@@ -109,4 +128,7 @@ clean:
109128
rm -rf $(BIN_DIR)
110129
rm -f lib/oapi/oapi.go
111130
rm -f lib/vmm/vmm.go
131+
rm -f lib/exec/exec.pb.go
132+
rm -f lib/exec/exec_grpc.pb.go
133+
rm -f lib/system/exec_agent/exec-agent
112134

cmd/api/api/exec.go

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
package api
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"encoding/json"
7+
"fmt"
8+
"io"
9+
"net/http"
10+
"sync"
11+
"time"
12+
13+
"github.com/go-chi/chi/v5"
14+
"github.com/gorilla/websocket"
15+
"github.com/onkernel/hypeman/lib/exec"
16+
"github.com/onkernel/hypeman/lib/instances"
17+
"github.com/onkernel/hypeman/lib/logger"
18+
)
19+
20+
var upgrader = websocket.Upgrader{
21+
ReadBufferSize: 32 * 1024,
22+
WriteBufferSize: 32 * 1024,
23+
CheckOrigin: func(r *http.Request) bool {
24+
// Allow all origins for now - can be tightened in production
25+
return true
26+
},
27+
}
28+
29+
// ExecRequest represents the JSON body for exec requests
30+
type ExecRequest struct {
31+
Command []string `json:"command"`
32+
TTY bool `json:"tty"`
33+
Env map[string]string `json:"env,omitempty"`
34+
Cwd string `json:"cwd,omitempty"`
35+
Timeout int32 `json:"timeout,omitempty"` // seconds
36+
}
37+
38+
// ExecHandler handles exec requests via WebSocket for bidirectional streaming
39+
func (s *ApiService) ExecHandler(w http.ResponseWriter, r *http.Request) {
40+
ctx := r.Context()
41+
log := logger.FromContext(ctx)
42+
startTime := time.Now()
43+
44+
instanceID := chi.URLParam(r, "id")
45+
46+
// Get instance
47+
inst, err := s.InstanceManager.GetInstance(ctx, instanceID)
48+
if err != nil {
49+
if err == instances.ErrNotFound {
50+
http.Error(w, `{"code":"not_found","message":"instance not found"}`, http.StatusNotFound)
51+
return
52+
}
53+
log.ErrorContext(ctx, "failed to get instance", "error", err)
54+
http.Error(w, `{"code":"internal_error","message":"failed to get instance"}`, http.StatusInternalServerError)
55+
return
56+
}
57+
58+
if inst.State != instances.StateRunning {
59+
http.Error(w, fmt.Sprintf(`{"code":"invalid_state","message":"instance must be running (current state: %s)"}`, inst.State), http.StatusConflict)
60+
return
61+
}
62+
63+
// Upgrade to WebSocket first
64+
ws, err := upgrader.Upgrade(w, r, nil)
65+
if err != nil {
66+
log.ErrorContext(ctx, "websocket upgrade failed", "error", err)
67+
return
68+
}
69+
defer ws.Close()
70+
71+
// Read JSON request from first WebSocket message
72+
msgType, message, err := ws.ReadMessage()
73+
if err != nil {
74+
log.ErrorContext(ctx, "failed to read exec request", "error", err)
75+
ws.WriteMessage(websocket.TextMessage, []byte(fmt.Sprintf(`{"error":"failed to read request: %v"}`, err)))
76+
return
77+
}
78+
79+
if msgType != websocket.TextMessage {
80+
log.ErrorContext(ctx, "expected text message with JSON request", "type", msgType)
81+
ws.WriteMessage(websocket.TextMessage, []byte(`{"error":"first message must be JSON text"}`))
82+
return
83+
}
84+
85+
// Parse JSON request
86+
var execReq ExecRequest
87+
if err := json.Unmarshal(message, &execReq); err != nil {
88+
log.ErrorContext(ctx, "invalid JSON request", "error", err)
89+
ws.WriteMessage(websocket.TextMessage, []byte(fmt.Sprintf(`{"error":"invalid JSON: %v"}`, err)))
90+
return
91+
}
92+
93+
// Default command if not specified
94+
if len(execReq.Command) == 0 {
95+
execReq.Command = []string{"/bin/sh"}
96+
}
97+
98+
// Get JWT subject for audit logging (if available)
99+
subject := "unknown"
100+
if claims, ok := r.Context().Value("claims").(map[string]interface{}); ok {
101+
if sub, ok := claims["sub"].(string); ok {
102+
subject = sub
103+
}
104+
}
105+
106+
// Audit log: exec session started
107+
log.InfoContext(ctx, "exec session started",
108+
"instance_id", instanceID,
109+
"subject", subject,
110+
"command", execReq.Command,
111+
"tty", execReq.TTY,
112+
"cwd", execReq.Cwd,
113+
"timeout", execReq.Timeout,
114+
)
115+
116+
// Create WebSocket read/writer wrapper
117+
wsConn := &wsReadWriter{ws: ws, ctx: ctx}
118+
119+
// Execute via vsock
120+
exit, err := exec.ExecIntoInstance(ctx, inst.VsockSocket, exec.ExecOptions{
121+
Command: execReq.Command,
122+
Stdin: wsConn,
123+
Stdout: wsConn,
124+
Stderr: wsConn,
125+
TTY: execReq.TTY,
126+
Env: execReq.Env,
127+
Cwd: execReq.Cwd,
128+
Timeout: execReq.Timeout,
129+
})
130+
131+
duration := time.Since(startTime)
132+
133+
if err != nil {
134+
log.ErrorContext(ctx, "exec failed",
135+
"error", err,
136+
"instance_id", instanceID,
137+
"subject", subject,
138+
"duration_ms", duration.Milliseconds(),
139+
)
140+
// Send error message over WebSocket before closing
141+
ws.WriteMessage(websocket.TextMessage, []byte(fmt.Sprintf("Error: %v", err)))
142+
return
143+
}
144+
145+
// Audit log: exec session ended
146+
log.InfoContext(ctx, "exec session ended",
147+
"instance_id", instanceID,
148+
"subject", subject,
149+
"exit_code", exit.Code,
150+
"duration_ms", duration.Milliseconds(),
151+
)
152+
153+
// Send close frame with exit code in JSON
154+
closeMsg := fmt.Sprintf(`{"exitCode":%d}`, exit.Code)
155+
ws.WriteMessage(websocket.TextMessage, []byte(closeMsg))
156+
}
157+
158+
// wsReadWriter wraps a WebSocket connection to implement io.ReadWriter
159+
type wsReadWriter struct {
160+
ws *websocket.Conn
161+
ctx context.Context
162+
reader io.Reader
163+
mu sync.Mutex
164+
}
165+
166+
func (w *wsReadWriter) Read(p []byte) (n int, err error) {
167+
w.mu.Lock()
168+
defer w.mu.Unlock()
169+
170+
// If we have a pending reader, continue reading from it
171+
if w.reader != nil {
172+
n, err = w.reader.Read(p)
173+
if err != io.EOF {
174+
return n, err
175+
}
176+
// EOF means we finished this message, get next one
177+
w.reader = nil
178+
}
179+
180+
// Read next WebSocket message
181+
messageType, data, err := w.ws.ReadMessage()
182+
if err != nil {
183+
return 0, err
184+
}
185+
186+
// Only handle binary and text messages
187+
if messageType != websocket.BinaryMessage && messageType != websocket.TextMessage {
188+
return 0, fmt.Errorf("unexpected message type: %d", messageType)
189+
}
190+
191+
// Create reader for this message
192+
w.reader = bytes.NewReader(data)
193+
return w.reader.Read(p)
194+
}
195+
196+
func (w *wsReadWriter) Write(p []byte) (n int, err error) {
197+
if err := w.ws.WriteMessage(websocket.BinaryMessage, p); err != nil {
198+
return 0, err
199+
}
200+
return len(p), nil
201+
}
202+

0 commit comments

Comments
 (0)