diff --git a/.gitignore b/.gitignore index 24d6f58..a87fa65 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .idea/ bin/ .env +cursor-adapter +cursor-mcp-server diff --git a/README.md b/README.md index 389482f..4ea5f24 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,26 @@ cp config.example.yaml ~/.cursor-adapter/config.yaml - `cursor_cli_path`:CLI 可執行檔名或路徑 - `default_model`、`available_models`、`timeout`(秒)、`max_concurrent` - `use_acp`、`chat_only_workspace`、`log_level` +- `cursor_mode`:`plan`(預設,純大腦+ `` 翻譯成 caller 端 tool_use)或 `agent`(讓 Cursor CLI 自己拿 host 的檔案/shell 工具直接執行) +- `workspace_root`:絕對路徑;設了之後子行程就跑在這個資料夾,不再用 chat-only temp dir。`agent` 模式下幾乎都要設。Per-request 用 `X-Cursor-Workspace` header 動態覆蓋。 + +### 兩種典型擺法 + +1. **大腦模式(預設)** + ```yaml + cursor_mode: plan + chat_only_workspace: true + ``` + Cursor CLI 不執行任何東西。proxy 把 system_prompt 注入腦袋,要它輸出 `{...}`,proxy 再翻成 Anthropic `tool_use` 給 caller(Claude Desktop / Claude Code / opencode)跑。caller 必須有自己的 host MCP(例如 desktop-commander)。 + +2. **執行者模式** + ```yaml + cursor_mode: agent + chat_only_workspace: false + workspace_root: /Users//Desktop + system_prompt: "" # 移掉「你只是大腦」的口令,讓它正常使用工具 + ``` + Cursor CLI 自己用內建 filesystem/shell 工具直接動 `workspace_root`。caller 不需要任何 MCP,整段在 host 上完成;回到 caller 那邊只有最後的文字結論。 ## HTTP 端點 diff --git a/cmd/mcp-server/main.go b/cmd/mcp-server/main.go new file mode 100644 index 0000000..5dde8b4 --- /dev/null +++ b/cmd/mcp-server/main.go @@ -0,0 +1,241 @@ +// Command cursor-mcp-server is a Model Context Protocol (MCP) server that +// exposes the cursor-adapter HTTP API as MCP tools for Claude Desktop. +// +// It communicates with Claude Desktop over stdio (JSON-RPC) and forwards +// requests to a running cursor-adapter instance via HTTP. +// +// Usage (standalone): +// +// go run ./cmd/mcp-server +// go run ./cmd/mcp-server --adapter-url http://127.0.0.1:8765 +// +// Usage (Claude Desktop config): +// +// { +// "mcpServers": { +// "cursor-bridge": { +// "command": "/path/to/cursor-mcp-server", +// "args": ["--adapter-url", "http://127.0.0.1:8765"] +// } +// } +// } +package main + +import ( + "bytes" + "context" + "encoding/json" + "flag" + "fmt" + "io" + "log" + "net/http" + "os" + "strings" + "time" + + "github.com/modelcontextprotocol/go-sdk/mcp" +) + +var adapterURL string + +func init() { + flag.StringVar(&adapterURL, "adapter-url", "http://127.0.0.1:8765", "cursor-adapter HTTP base URL") +} + +// --- Tool input/output types --- + +type AskCursorInput struct { + Prompt string `json:"prompt" mcp:"required"` + Model string `json:"model"` +} + +type EmptyInput struct{} + +type TextOutput struct { + Text string `json:"text"` +} + +// --- Tool handlers --- + +func askCursor(ctx context.Context, _ *mcp.CallToolRequest, input AskCursorInput) (*mcp.CallToolResult, TextOutput, error) { + model := input.Model + if model == "" { + model = "claude-opus-4-7-high" + } + + payload := map[string]interface{}{ + "model": model, + "max_tokens": 16384, + "messages": []map[string]string{{"role": "user", "content": input.Prompt}}, + "stream": false, + } + body, _ := json.Marshal(payload) + + httpReq, err := http.NewRequestWithContext(ctx, "POST", adapterURL+"/v1/messages", bytes.NewReader(body)) + if err != nil { + return nil, TextOutput{}, fmt.Errorf("build request: %w", err) + } + httpReq.Header.Set("Content-Type", "application/json") + httpReq.Header.Set("x-api-key", "mcp-bridge") + + client := &http.Client{Timeout: 5 * time.Minute} + resp, err := client.Do(httpReq) + if err != nil { + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: "❌ Cannot connect to cursor-adapter at " + adapterURL + ". Make sure it is running."}}, + IsError: true, + }, TextOutput{}, nil + } + defer resp.Body.Close() + + respBody, _ := io.ReadAll(resp.Body) + if resp.StatusCode != 200 { + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: fmt.Sprintf("❌ cursor-adapter HTTP %d: %s", resp.StatusCode, string(respBody))}}, + IsError: true, + }, TextOutput{}, nil + } + + var data struct { + Content []struct { + Type string `json:"type"` + Text string `json:"text"` + } `json:"content"` + Error *struct { + Message string `json:"message"` + } `json:"error"` + } + if err := json.Unmarshal(respBody, &data); err != nil { + return nil, TextOutput{Text: string(respBody)}, nil + } + + if data.Error != nil { + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: "❌ Cursor error: " + data.Error.Message}}, + IsError: true, + }, TextOutput{}, nil + } + + var texts []string + for _, block := range data.Content { + if block.Type == "text" { + texts = append(texts, block.Text) + } + } + result := strings.Join(texts, "\n") + if result == "" { + result = string(respBody) + } + + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: fmt.Sprintf("[Model: %s]\n\n%s", model, result)}}, + }, TextOutput{Text: result}, nil +} + +func listModels(ctx context.Context, _ *mcp.CallToolRequest, _ EmptyInput) (*mcp.CallToolResult, TextOutput, error) { + httpReq, err := http.NewRequestWithContext(ctx, "GET", adapterURL+"/v1/models", nil) + if err != nil { + return nil, TextOutput{}, err + } + + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(httpReq) + if err != nil { + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: "❌ Cannot connect to cursor-adapter"}}, + IsError: true, + }, TextOutput{}, nil + } + defer resp.Body.Close() + + respBody, _ := io.ReadAll(resp.Body) + var data struct { + Data []struct { + ID string `json:"id"` + } `json:"data"` + } + if err := json.Unmarshal(respBody, &data); err != nil { + return nil, TextOutput{Text: string(respBody)}, nil + } + + var lines []string + lines = append(lines, fmt.Sprintf("Available models (%d total):\n", len(data.Data))) + for _, m := range data.Data { + lines = append(lines, " "+m.ID) + } + text := strings.Join(lines, "\n") + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: text}}, + }, TextOutput{Text: text}, nil +} + +func checkHealth(ctx context.Context, _ *mcp.CallToolRequest, _ EmptyInput) (*mcp.CallToolResult, TextOutput, error) { + httpReq, err := http.NewRequestWithContext(ctx, "GET", adapterURL+"/health", nil) + if err != nil { + return nil, TextOutput{}, err + } + + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Do(httpReq) + if err != nil { + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: "❌ cursor-adapter is not running"}}, + IsError: true, + }, TextOutput{}, nil + } + defer resp.Body.Close() + + respBody, _ := io.ReadAll(resp.Body) + var pretty bytes.Buffer + text := string(respBody) + if err := json.Indent(&pretty, respBody, "", " "); err == nil { + text = pretty.String() + } + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: text}}, + }, TextOutput{Text: text}, nil +} + +func main() { + flag.Parse() + + if envURL := os.Getenv("CURSOR_ADAPTER_URL"); envURL != "" { + adapterURL = envURL + } + + server := mcp.NewServer( + &mcp.Implementation{ + Name: "cursor-bridge", + Version: "1.0.0", + }, + &mcp.ServerOptions{ + Instructions: "This server provides access to the Cursor AI coding agent via cursor-adapter. " + + "Use ask_cursor to delegate coding tasks, code generation, debugging, or technical questions to Cursor.", + }, + ) + + mcp.AddTool(server, &mcp.Tool{ + Name: "ask_cursor", + Description: "Ask the Cursor AI agent a question or delegate a coding task. " + + "Use this when you need code generation, review, debugging, or a second opinion. " + + "The Cursor agent acts as a pure reasoning engine. " + + "Available models: claude-opus-4-7-high (default), claude-opus-4-7-thinking-high, " + + "claude-4.6-opus-high, claude-4.6-sonnet-medium, gpt-5.4-medium, gemini-3.1-pro. " + + "Pass model name in the 'model' field.", + }, askCursor) + + mcp.AddTool(server, &mcp.Tool{ + Name: "list_cursor_models", + Description: "List all available models from the Cursor adapter.", + }, listModels) + + mcp.AddTool(server, &mcp.Tool{ + Name: "cursor_health", + Description: "Check the health status of the cursor-adapter service.", + }, checkHealth) + + if err := server.Run(context.Background(), &mcp.StdioTransport{}); err != nil { + log.Fatal(err) + } +} diff --git a/config.example.yaml b/config.example.yaml index 53d3cf4..dffb990 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -12,6 +12,24 @@ use_acp: false # Cursor agent to see the adapter's working directory. chat_only_workspace: true +# How to launch the Cursor CLI subprocess. +# plan (default): pass --mode plan; the CLI never executes tools, it +# just proposes plans. The proxy translates brain-side +# ... sentinels into real Anthropic +# tool_use blocks for the calling client to execute. +# agent: omit --mode and add --trust; the CLI runs in its native agent +# mode with full filesystem/shell tools and acts inside +# workspace_root. Use this when you want the CLI itself to be +# the executor (e.g. let it reorganise ~/Desktop directly). +cursor_mode: plan + +# Absolute directory the Cursor CLI subprocess runs in. Setting this +# disables the chat-only temp workspace isolation. Required when +# cursor_mode: agent if you want the CLI to act on a real folder. +# Per-request override: clients can send `X-Cursor-Workspace: /abs/path`. +# Example: workspace_root: /Users/daniel/Desktop +workspace_root: "" + log_level: INFO available_models: - claude-sonnet-4-20250514 diff --git a/config.yaml b/config.yaml index 9b0f4b4..d684b01 100644 --- a/config.yaml +++ b/config.yaml @@ -1,20 +1,110 @@ port: 8765 cursor_cli_path: agent -default_model: claude-sonnet-4-20250514 +default_model: claude-opus-4-7-high timeout: 300 max_concurrent: 5 use_acp: false +chat_only_workspace: true +log_level: ERROR -# Isolate Cursor CLI / ACP child in an empty temp workspace with -# HOME / CURSOR_CONFIG_DIR / XDG_CONFIG_HOME overridden so the agent can -# neither read the adapter's cwd nor load global rules from ~/.cursor. -# Recommended: true. Set to false only if you intentionally want the -# Cursor agent to see the adapter's working directory. -chat_only_workspace: false +system_prompt: | + You are the reasoning brain of a two-process AI system. The caller + (Claude Desktop / Claude Code / opencode) has the user's real machine, + filesystem, terminal, and any MCP-provided tools. You do not. You drive + it remotely by emitting tool invocations the proxy translates into real + Anthropic tool_use blocks for the caller to execute. + + Each turn you receive (a) the user's request, (b) the list of executors + the caller currently has under "Available executors", and (c) a + transcript of past tool_use / tool_result blocks. Decide what to do + next. + + CONTRACT — output exactly ONE of the following per turn: + + 1. A normal answer in plain markdown (when no tool is needed, or when + you are presenting the final result to the user). + + 2. EXACTLY one tool invocation, wrapped in sentinels with NOTHING else + before or after: + + + {"name":"","input": { ... }} + + + - "name" must match one of the executors listed under "Available + executors". If no executors are listed, fall back to a plain-text + answer. + - "input" must conform to that tool's input_schema. + - Do not add commentary around the block. The proxy will translate + it into a real tool_use call; the caller will run it and send you + a tool_result on the next turn. + + HARD RULES: + - NEVER claim you "cannot access the filesystem / terminal / browser", + "are in Ask/Plan mode", "are sandboxed", or "need the user to do it + manually". Those statements are false here — the caller is your + hands. + - NEVER fabricate tool names. If the tool you need is not listed, + explain in plain text what command/operation should be run and stop. + - Prefer a single tool_call per turn; iterate via the tool_result + feedback. Multi-step plans should be expressed as a sequence of + turns, not crammed into one block. + - Default target shell when emitting raw shell commands inside text: + zsh on macOS, unless the user says otherwise. + + VOCABULARY — when the user says any of: + "工作目錄" / "working directory" / "cwd" / "pwd" / "目前資料夾" / + "這個資料夾" / "the folder" / "the project" + they ALWAYS mean the folder the caller (Claude Desktop / Claude + Code / opencode) attached or opened for this session — i.e. the + host-mounted folder visible to the caller's executor (typically + under `/sessions//mnt/` or an absolute host path). They + NEVER mean the directory your own subprocess happens to be running + in, and they NEVER mean a sandbox path like `/sessions/.../mnt/` + with no folder under it. If you are tempted to call `pwd` and + answer with that, stop — the answer the user wants is the mount + root, found by listing `/sessions/*/mnt/*/` (see ORIENTATION + below) or by reading the "Known host-mount paths" section. + + ORIENTATION (first turn of a fresh session): + The caller's executor often runs inside a sandbox (e.g. Claude + Desktop's Cowork) that bind-mounts ONE folder the user attached for + this session. The folder's name is unknown to you in advance — it + could be Desktop, a project root, Documents, anything. From the + sandbox it shows up under `/sessions//mnt/`, and that + path IS the user's working folder for this conversation regardless of + its name. + + If the user refers to "my folder" / "the mounted folder" / "this + project" / "the desktop" / etc. and you have a shell-like executor + available but no path has been established yet (no `Working + directory:` line, no "Known host-mount paths" section, no prior + tool_result revealing one), your FIRST tool_call must be a single + discovery probe that enumerates every mount under `/sessions/*/mnt/`, + e.g.: + + + {"name":"","input":{"command":"pwd; ls -d /sessions/*/mnt/*/ 2>/dev/null; ls -la /workspace 2>/dev/null | head"}} + + + Treat whatever directory comes back under `/sessions/*/mnt/` as + THE working folder for this session, no matter what `` is. Then + use that path (or subpaths under it) for every subsequent tool_call. + Do NOT ask the user to name or re-state the folder — they already + attached it. The proxy also re-surfaces previously discovered mount + roots under "Known host-mount paths" on later turns; prefer those + over re-probing. -log_level: INFO available_models: - - claude-sonnet-4-20250514 - - claude-opus-4-20250514 + - claude-opus-4-7-high + - claude-opus-4-7-thinking-high + - claude-4.6-opus-high + - claude-4.6-opus-high-thinking + - claude-4.6-sonnet-medium + - claude-4.6-sonnet-medium-thinking + - claude-4.5-opus-high + - claude-4.5-sonnet + - claude-4-sonnet + - gpt-5.4-medium - gpt-5.2 - gemini-3.1-pro diff --git a/go.mod b/go.mod index 302d1a4..3b926e9 100644 --- a/go.mod +++ b/go.mod @@ -4,11 +4,18 @@ go 1.26.1 require ( github.com/go-chi/chi/v5 v5.2.5 + github.com/modelcontextprotocol/go-sdk v1.5.0 github.com/spf13/cobra v1.10.2 gopkg.in/yaml.v3 v3.0.1 ) require ( + github.com/google/jsonschema-go v0.4.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/segmentio/asm v1.1.3 // indirect + github.com/segmentio/encoding v0.5.4 // indirect github.com/spf13/pflag v1.0.9 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + golang.org/x/oauth2 v0.35.0 // indirect + golang.org/x/sys v0.41.0 // indirect ) diff --git a/go.sum b/go.sum index 6196bb2..68f6f16 100644 --- a/go.sum +++ b/go.sum @@ -1,14 +1,34 @@ github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/go-chi/chi/v5 v5.2.5 h1:Eg4myHZBjyvJmAFjFvWgrqDTXFyOzjj7YIm3L3mu6Ug= github.com/go-chi/chi/v5 v5.2.5/go.mod h1:X7Gx4mteadT3eDOMTsXzmI4/rwUpOwBHLpAfupzFJP0= +github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= +github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8= +github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/modelcontextprotocol/go-sdk v1.5.0 h1:CHU0FIX9kpueNkxuYtfYQn1Z0slhFzBZuq+x6IiblIU= +github.com/modelcontextprotocol/go-sdk v1.5.0/go.mod h1:gggDIhoemhWs3BGkGwd1umzEXCEMMvAnhTrnbXJKKKA= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/segmentio/asm v1.1.3 h1:WM03sfUOENvvKexOLp+pCqgb/WDjsi7EK8gIsICtzhc= +github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg= +github.com/segmentio/encoding v0.5.4 h1:OW1VRern8Nw6ITAtwSZ7Idrl3MXCFwXHPgqESYfvNt0= +github.com/segmentio/encoding v0.5.4/go.mod h1:HS1ZKa3kSN32ZHVZ7ZLPLXWvOVIiZtyJnO1gPH1sKt0= github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= +golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/internal/bridge/bridge.go b/internal/bridge/bridge.go index 5411e45..d50741b 100644 --- a/internal/bridge/bridge.go +++ b/internal/bridge/bridge.go @@ -27,28 +27,86 @@ type Bridge interface { CheckHealth(ctx context.Context) error } -// NewBridge 建立 Bridge。chatOnly=true 會讓每個子程序都跑在空的 temp -// workspace、並且用 env overrides 把 HOME / CURSOR_CONFIG_DIR 導到那個 -// temp dir,讓 Cursor agent 讀不到任何真實專案檔案或全域 rules。 -func NewBridge(cursorPath string, logger *slog.Logger, useACP bool, chatOnly bool, maxConcurrent int, timeout time.Duration) Bridge { - if useACP { - return NewACPBridge(cursorPath, logger, chatOnly, maxConcurrent, timeout) +// Options bundles the knobs NewBridge needs. CursorPath, ChatOnly, Mode and +// WorkspaceRoot together decide how each subprocess is sandboxed and what +// `--mode` flag (if any) is passed. +type Options struct { + CursorPath string + Logger *slog.Logger + UseACP bool + ChatOnly bool + MaxConcurrent int + Timeout time.Duration + // Mode is "plan" (the CLI proposes only; caller executes via tool_use + // translation) or "agent" (the CLI executes natively in WorkspaceRoot). + // Empty defaults to "plan". + Mode string + // WorkspaceRoot, when non-empty, overrides ChatOnly's temp workspace + // and runs the CLI directly in this absolute directory. Per-request + // override via context (see WithWorkspaceOverride) takes precedence. + WorkspaceRoot string +} + +// NewBridge 建立 Bridge。 +func NewBridge(opts Options) Bridge { + if opts.UseACP { + return NewACPBridge(opts) } - return NewCLIBridge(cursorPath, chatOnly, maxConcurrent, timeout) + return NewCLIBridge(opts) +} + +// --- per-request workspace override via context --- + +type ctxKey int + +const workspaceCtxKey ctxKey = 1 + +// WithWorkspaceOverride attaches a per-request absolute workspace path to +// ctx. Bridges honour it ahead of the Options.WorkspaceRoot. +func WithWorkspaceOverride(ctx context.Context, workspace string) context.Context { + if workspace == "" { + return ctx + } + return context.WithValue(ctx, workspaceCtxKey, workspace) +} + +func workspaceOverride(ctx context.Context) string { + v, _ := ctx.Value(workspaceCtxKey).(string) + return v } // --- CLI Bridge --- type CLIBridge struct { - cursorPath string - semaphore chan struct{} - timeout time.Duration - chatOnly bool + cursorPath string + semaphore chan struct{} + timeout time.Duration + chatOnly bool + mode string + workspaceRoot string } -func buildCLICommandArgs(prompt, model, workspaceDir string, stream, chatOnly bool) []string { - args := []string{"--print", "--mode", "ask"} - if chatOnly { +func buildCLICommandArgs(prompt, model, workspaceDir, mode string, stream, chatOnly bool) []string { + args := []string{"--print"} + // "plan" (default): the CLI proposes plans without executing tools; + // the proxy translates a brain-side sentinel into real + // Anthropic tool_use blocks for the caller to execute. + // "agent": omit --mode to let the CLI run with full filesystem/shell + // tools — useful when the user wants the CLI itself to be the + // executor inside a real workspace dir. + switch mode { + case "agent": + // no --mode flag — agent mode is the CLI default + case "", "plan": + args = append(args, "--mode", "plan") + default: + args = append(args, "--mode", mode) + } + // --trust skips interactive permission prompts. We always want this + // non-interactively: chat-only mode is sandboxed anyway, and agent + // mode against a real WorkspaceRoot means the operator already + // opted in to letting the CLI execute there. + if chatOnly || mode == "agent" { args = append(args, "--trust") } if workspaceDir != "" { @@ -66,23 +124,40 @@ func buildCLICommandArgs(prompt, model, workspaceDir string, stream, chatOnly bo return args } -func NewCLIBridge(cursorPath string, chatOnly bool, maxConcurrent int, timeout time.Duration) *CLIBridge { - if maxConcurrent <= 0 { - maxConcurrent = 1 +// NewCLIBridge constructs a CLIBridge from an Options struct. ChatOnly, +// Mode and WorkspaceRoot together decide how each subprocess is sandboxed. +func NewCLIBridge(opts Options) *CLIBridge { + if opts.MaxConcurrent <= 0 { + opts.MaxConcurrent = 1 } return &CLIBridge{ - cursorPath: cursorPath, - semaphore: make(chan struct{}, maxConcurrent), - timeout: timeout, - chatOnly: chatOnly, + cursorPath: opts.CursorPath, + semaphore: make(chan struct{}, opts.MaxConcurrent), + timeout: opts.Timeout, + chatOnly: opts.ChatOnly, + mode: opts.Mode, + workspaceRoot: opts.WorkspaceRoot, } } -// prepareWorkspace returns (workspaceDir, envOverrides, cleanup). When -// chatOnly is enabled, workspaceDir is a fresh temp dir and cleanup removes -// it. Otherwise workspaceDir falls back to the adapter's cwd with no -// cleanup. -func (b *CLIBridge) prepareWorkspace() (string, map[string]string, func()) { +// prepareWorkspace returns (workspaceDir, envOverrides, cleanup). +// +// Resolution order: +// 1. ctx override (X-Cursor-Workspace header) if set +// 2. configured WorkspaceRoot if set +// 3. chat-only temp dir if enabled +// 4. adapter's cwd +// +// Cases (1) and (2) deliberately return no env overrides — the caller +// asked for a real host directory, so HOME / CURSOR_CONFIG_DIR stay +// untouched and the CLI sees the real user profile (auth + tools). +func (b *CLIBridge) prepareWorkspace(ctx context.Context) (string, map[string]string, func()) { + if override := workspaceOverride(ctx); override != "" { + return override, nil, func() {} + } + if b.workspaceRoot != "" { + return b.workspaceRoot, nil, func() {} + } if !b.chatOnly { ws, _ := os.Getwd() return ws, nil, func() {} @@ -115,9 +190,9 @@ func (b *CLIBridge) Execute(ctx context.Context, prompt string, model string, se execCtx, cancel := context.WithTimeout(ctx, b.timeout) defer cancel() - ws, envOverrides, cleanup := b.prepareWorkspace() + ws, envOverrides, cleanup := b.prepareWorkspace(ctx) defer cleanup() - cmd := exec.CommandContext(execCtx, b.cursorPath, buildCLICommandArgs(prompt, model, ws, true, b.chatOnly)...) + cmd := exec.CommandContext(execCtx, b.cursorPath, buildCLICommandArgs(prompt, model, ws, b.mode, true, b.chatOnly)...) cmd.Dir = ws cmd.Env = workspace.MergeEnv(os.Environ(), envOverrides) @@ -165,9 +240,9 @@ func (b *CLIBridge) ExecuteSync(ctx context.Context, prompt string, model string execCtx, cancel := context.WithTimeout(ctx, b.timeout) defer cancel() - ws, envOverrides, cleanup := b.prepareWorkspace() + ws, envOverrides, cleanup := b.prepareWorkspace(ctx) defer cleanup() - cmd := exec.CommandContext(execCtx, b.cursorPath, buildCLICommandArgs(prompt, model, ws, false, b.chatOnly)...) + cmd := exec.CommandContext(execCtx, b.cursorPath, buildCLICommandArgs(prompt, model, ws, b.mode, false, b.chatOnly)...) cmd.Dir = ws cmd.Env = workspace.MergeEnv(os.Environ(), envOverrides) var stdout, stderr bytes.Buffer @@ -233,15 +308,16 @@ func parseModelsOutput(output string) []string { // --- ACP Bridge (per-request 完整流程,參考 cursor-api-proxy) --- type ACPBridge struct { - cursorPath string - logger *slog.Logger - timeout time.Duration - chatOnly bool - workers []*acpWorker - nextWorker atomic.Uint32 - sessionsMu sync.Mutex - sessions map[string]acpSessionHandle - sessionTTL time.Duration + cursorPath string + logger *slog.Logger + timeout time.Duration + chatOnly bool + workspaceRoot string + workers []*acpWorker + nextWorker atomic.Uint32 + sessionsMu sync.Mutex + sessions map[string]acpSessionHandle + sessionTTL time.Duration } type acpSessionHandle struct { @@ -252,20 +328,21 @@ type acpSessionHandle struct { LastUsedAt time.Time } -func NewACPBridge(cursorPath string, logger *slog.Logger, chatOnly bool, maxConcurrent int, timeout time.Duration) *ACPBridge { - if maxConcurrent <= 0 { - maxConcurrent = 1 +func NewACPBridge(opts Options) *ACPBridge { + if opts.MaxConcurrent <= 0 { + opts.MaxConcurrent = 1 } bridge := &ACPBridge{ - cursorPath: cursorPath, - logger: logger, - timeout: timeout, - chatOnly: chatOnly, - sessions: make(map[string]acpSessionHandle), - sessionTTL: 30 * time.Minute, + cursorPath: opts.CursorPath, + logger: opts.Logger, + timeout: opts.Timeout, + chatOnly: opts.ChatOnly, + workspaceRoot: opts.WorkspaceRoot, + sessions: make(map[string]acpSessionHandle), + sessionTTL: 30 * time.Minute, } - for i := 0; i < maxConcurrent; i++ { - bridge.workers = append(bridge.workers, newACPWorker(cursorPath, logger, chatOnly, timeout)) + for i := 0; i < opts.MaxConcurrent; i++ { + bridge.workers = append(bridge.workers, newACPWorker(opts.CursorPath, opts.Logger, opts.ChatOnly, opts.WorkspaceRoot, opts.Timeout)) } return bridge } @@ -304,10 +381,11 @@ type acpResponse struct { } type acpWorker struct { - cursorPath string - logger *slog.Logger - timeout time.Duration - chatOnly bool + cursorPath string + logger *slog.Logger + timeout time.Duration + chatOnly bool + workspaceRoot string reqMu sync.Mutex @@ -327,12 +405,13 @@ type acpWorker struct { generation atomic.Uint64 } -func newACPWorker(cursorPath string, logger *slog.Logger, chatOnly bool, timeout time.Duration) *acpWorker { +func newACPWorker(cursorPath string, logger *slog.Logger, chatOnly bool, workspaceRoot string, timeout time.Duration) *acpWorker { return &acpWorker{ - cursorPath: cursorPath, - logger: logger, - timeout: timeout, - chatOnly: chatOnly, + cursorPath: cursorPath, + logger: logger, + timeout: timeout, + chatOnly: chatOnly, + workspaceRoot: workspaceRoot, } } @@ -380,7 +459,7 @@ func (b *ACPBridge) ExecuteSync(ctx context.Context, prompt string, model string func (b *ACPBridge) pickWorker() *acpWorker { if len(b.workers) == 0 { - return newACPWorker(b.cursorPath, b.logger, b.chatOnly, b.timeout) + return newACPWorker(b.cursorPath, b.logger, b.chatOnly, b.workspaceRoot, b.timeout) } idx := int(b.nextWorker.Add(1)-1) % len(b.workers) return b.workers[idx] @@ -613,12 +692,15 @@ func (w *acpWorker) ensureStartedLocked(ctx context.Context, model string) error env map[string]string err error ) - if w.chatOnly { + switch { + case w.workspaceRoot != "": + dir = w.workspaceRoot + case w.chatOnly: dir, env, err = workspace.ChatOnly("") if err != nil { return fmt.Errorf("chat-only workspace: %w", err) } - } else { + default: dir, err = os.MkdirTemp("", "cursor-acp-worker-*") if err != nil { return fmt.Errorf("temp workspace: %w", err) @@ -923,7 +1005,11 @@ func (w *acpWorker) resetLocked() { } if w.workspace != "" { - _ = os.RemoveAll(w.workspace) + // Only remove temp / chat-only directories — never delete a + // configured WorkspaceRoot (that's a real user directory). + if w.workspace != w.workspaceRoot { + _ = os.RemoveAll(w.workspace) + } w.workspace = "" } w.envOverrides = nil diff --git a/internal/bridge/bridge_test.go b/internal/bridge/bridge_test.go index 58795ca..e635a8d 100644 --- a/internal/bridge/bridge_test.go +++ b/internal/bridge/bridge_test.go @@ -11,8 +11,12 @@ import ( "time" ) +func cliOpts(path string, chatOnly bool, max int, timeout time.Duration) Options { + return Options{CursorPath: path, ChatOnly: chatOnly, MaxConcurrent: max, Timeout: timeout} +} + func TestNewBridge(t *testing.T) { - b := NewCLIBridge("/usr/bin/agent", false, 4, 30*time.Second) + b := NewCLIBridge(cliOpts("/usr/bin/agent", false, 4, 30*time.Second)) if b == nil { t.Fatal("NewCLIBridge returned nil") } @@ -28,14 +32,14 @@ func TestNewBridge(t *testing.T) { } func TestNewBridge_DefaultConcurrency(t *testing.T) { - b := NewCLIBridge("agent", false, 0, 10*time.Second) + b := NewCLIBridge(cliOpts("agent", false, 0, 10*time.Second)) if cap(b.semaphore) != 1 { t.Errorf("semaphore capacity = %d, want 1 (default)", cap(b.semaphore)) } } func TestNewBridge_NegativeConcurrency(t *testing.T) { - b := NewCLIBridge("agent", false, -5, 10*time.Second) + b := NewCLIBridge(cliOpts("agent", false, -5, 10*time.Second)) if cap(b.semaphore) != 1 { t.Errorf("semaphore capacity = %d, want 1 (default for negative)", cap(b.semaphore)) } @@ -43,7 +47,7 @@ func TestNewBridge_NegativeConcurrency(t *testing.T) { func TestNewBridge_UsesACPWhenRequested(t *testing.T) { logger := slog.New(slog.NewTextHandler(io.Discard, nil)) - b := NewBridge("agent", logger, true, false, 2, 10*time.Second) + b := NewBridge(Options{CursorPath: "agent", Logger: logger, UseACP: true, MaxConcurrent: 2, Timeout: 10 * time.Second}) if _, ok := b.(*ACPBridge); !ok { t.Fatalf("expected ACPBridge, got %T", b) } @@ -75,11 +79,11 @@ func TestBuildACPCommandArgs_WithModel(t *testing.T) { } } -func TestBuildCLICommandArgs_UsesAskMode(t *testing.T) { - got := buildCLICommandArgs("hello", "auto", "/tmp/workspace", true, false) +func TestBuildCLICommandArgs_PlanMode(t *testing.T) { + got := buildCLICommandArgs("hello", "auto", "/tmp/workspace", "plan", true, false) wantPrefix := []string{ "--print", - "--mode", "ask", + "--mode", "plan", "--workspace", "/tmp/workspace", "--model", "auto", "--stream-partial-output", "--output-format", "stream-json", @@ -98,7 +102,7 @@ func TestBuildCLICommandArgs_UsesAskMode(t *testing.T) { } func TestBuildCLICommandArgs_ChatOnlyAddsTrust(t *testing.T) { - got := buildCLICommandArgs("hi", "", "/tmp/ws", false, true) + got := buildCLICommandArgs("hi", "", "/tmp/ws", "plan", false, true) found := false for _, a := range got { if a == "--trust" { @@ -111,16 +115,35 @@ func TestBuildCLICommandArgs_ChatOnlyAddsTrust(t *testing.T) { } } -// mockCmdHelper builds a bridge that executes a fake command for channel logic testing. +func TestBuildCLICommandArgs_AgentModeOmitsModeFlagAndAddsTrust(t *testing.T) { + got := buildCLICommandArgs("hi", "", "/Users/me/Desktop", "agent", false, false) + for _, a := range got { + if a == "--mode" { + t.Fatalf("agent mode should not emit --mode flag, args: %v", got) + } + } + hasTrust := false + for _, a := range got { + if a == "--trust" { + hasTrust = true + break + } + } + if !hasTrust { + t.Fatalf("agent mode should imply --trust, args: %v", got) + } +} + +// mockCmdBridge builds a bridge that executes a fake command for channel logic testing. +// +//nolint:unused func mockCmdBridge(t *testing.T) *CLIBridge { t.Helper() - // Use "echo" as a mock command that outputs valid JSON lines - // We'll override Execute logic by using a custom cursorPath that is "echo" - return NewCLIBridge("echo", false, 2, 5*time.Second) + return NewCLIBridge(cliOpts("echo", false, 2, 5*time.Second)) } func TestExecute_ContextCancelled(t *testing.T) { - b := NewCLIBridge("/bin/sleep", false, 1, 30*time.Second) + b := NewCLIBridge(cliOpts("/bin/sleep", false, 1, 30*time.Second)) ctx, cancel := context.WithCancel(context.Background()) cancel() // cancel immediately @@ -148,7 +171,7 @@ func TestExecute_ContextCancelled(t *testing.T) { } func TestExecute_SemaphoreBlocking(t *testing.T) { - b := NewCLIBridge("/bin/sleep", false, 1, 30*time.Second) + b := NewCLIBridge(cliOpts("/bin/sleep", false, 1, 30*time.Second)) // Fill the semaphore b.semaphore <- struct{}{} @@ -173,7 +196,7 @@ func TestExecute_SemaphoreBlocking(t *testing.T) { } func TestExecute_InvalidCommand(t *testing.T) { - b := NewCLIBridge("/nonexistent/command", false, 1, 5*time.Second) + b := NewCLIBridge(cliOpts("/nonexistent/command", false, 1, 5*time.Second)) ctx := context.Background() outputChan, errChan := b.Execute(ctx, "test", "model", "") @@ -196,7 +219,7 @@ func TestExecute_InvalidCommand(t *testing.T) { func TestExecute_ValidJSONOutput(t *testing.T) { // Use "printf" to simulate JSON line output - b := NewCLIBridge("printf", false, 2, 5*time.Second) + b := NewCLIBridge(cliOpts("printf", false, 2, 5*time.Second)) ctx := context.Background() // printf with JSON lines diff --git a/internal/config/config.go b/internal/config/config.go index 092095b..af08f59 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -18,6 +18,28 @@ type Config struct { ChatOnlyWorkspace bool `yaml:"chat_only_workspace"` LogLevel string `yaml:"log_level"` AvailableModels []string `yaml:"available_models,omitempty"` + SystemPrompt string `yaml:"system_prompt"` + + // CursorMode controls how the Cursor CLI subprocess is launched. + // "plan" (default): pass `--mode plan`. The CLI never executes + // tools; it only proposes plans. Combined with brain + // SystemPrompt + sentinel translation, the + // caller (Claude Desktop) is the executor. + // "agent": omit `--mode`, letting Cursor CLI use its native agent + // mode with full filesystem/shell tools. The CLI itself + // becomes the executor and acts inside WorkspaceRoot. + CursorMode string `yaml:"cursor_mode"` + + // WorkspaceRoot, when non-empty, is the absolute directory the Cursor + // CLI subprocess runs in (and treats as its project root). Setting + // this disables the chat-only temp workspace isolation. Useful when + // you want the CLI to actually edit files on the host (e.g. set to + // /Users//Desktop and use cursor_mode: agent to let it + // reorganise that folder directly). + // + // Per-request override: clients may send `X-Cursor-Workspace: /abs/path` + // to switch the working directory just for that call. + WorkspaceRoot string `yaml:"workspace_root"` } // Defaults returns a Config populated with default values. @@ -38,9 +60,91 @@ func Defaults() Config { UseACP: false, ChatOnlyWorkspace: true, LogLevel: "INFO", + SystemPrompt: DefaultSystemPrompt, + CursorMode: "plan", + WorkspaceRoot: "", } } +// DefaultSystemPrompt is prepended to every prompt sent to the Cursor CLI. +// It puts the model in "remote brain" mode: it never executes anything +// directly; instead it either answers in plain text or emits a single +// {...} sentinel that the proxy translates into a +// real Anthropic tool_use block for the caller (Claude Desktop / Claude +// Code / opencode) to execute. The caller's tool_result comes back as +// transcript on the next turn. +const DefaultSystemPrompt = `You are the reasoning brain of a two-process AI system. ` + + `The caller (Claude Desktop / Claude Code / opencode) has the user's real machine, ` + + `filesystem, terminal, and any MCP-provided tools. You do not. You drive it remotely ` + + `by emitting tool invocations the proxy translates into real Anthropic tool_use blocks ` + + `for the caller to execute. + +Each turn you receive (a) the user's request, (b) the list of executors the caller ` + + `currently has under "Available executors", and (c) a transcript of past tool_use / ` + + `tool_result blocks. + +CONTRACT — output exactly ONE of: + +1. A normal answer in plain markdown (when no tool is needed, or when you are ` + + `presenting the final result to the user). + +2. EXACTLY one tool invocation, wrapped in sentinels with NOTHING else around it: + + + {"name":"","input": { ... }} + + + "name" must match an executor listed under "Available executors"; "input" must ` + + `conform to that tool's input_schema. + +HARD RULES: +- NEVER claim you "cannot access the filesystem / terminal / browser", "are in Ask ` + + `or Plan mode", "are sandboxed", or "need the user to do it manually". Those ` + + `statements are false — the caller is your hands. +- NEVER fabricate tool names. If the tool you need is not in the list, explain in ` + + `plain text what should be run and stop. +- Prefer a single tool_call per turn; iterate via tool_result feedback. +- Default shell when emitting raw commands as text: zsh on macOS. + +VOCABULARY — when the user says any of: + "工作目錄" / "working directory" / "cwd" / "pwd" / "目前資料夾" / + "這個資料夾" / "the folder" / "the project" +they ALWAYS mean the folder the caller (Claude Desktop / Claude Code / opencode) ` + + `attached or opened for this session — i.e. the host-mounted folder visible ` + + `to the caller's executor (typically under ` + "`/sessions//mnt/`" + ` or ` + + `an absolute host path). They NEVER mean the directory your own subprocess ` + + `happens to be running in, and they NEVER mean a sandbox path like ` + + "`/sessions/.../mnt/`" + ` with no folder under it. If you are tempted to call ` + + "`pwd`" + ` and answer with that, stop — the answer the user wants is the ` + + `mount root, which is found by listing ` + "`/sessions/*/mnt/*/`" + ` (see ` + + `ORIENTATION below) or by reading the "Known host-mount paths" section. + +ORIENTATION (first turn of a fresh session): +The caller's executor often runs inside a sandbox (e.g. Claude Desktop's ` + + `Cowork) that bind-mounts ONE folder the user attached for this session. ` + + `The folder's name is unknown to you in advance — it could be Desktop, a ` + + `project root, Documents, anything. From the sandbox it shows up under ` + + "`/sessions//mnt/`" + `, and that path IS the user's working ` + + `folder for this conversation regardless of its name. + +If the user refers to "my folder" / "the mounted folder" / "this project" / ` + + `"the desktop" / etc. and you have a shell-like executor available but no ` + + `path has been established yet (no ` + "`Working directory:`" + ` line, no ` + + `"Known host-mount paths" section, no prior tool_result revealing one), ` + + `your FIRST tool_call must be a single discovery probe that enumerates ` + + `every mount under ` + "`/sessions/*/mnt/`" + `, e.g.: + + + {"name":"","input":{"command":"pwd; ls -d /sessions/*/mnt/*/ 2>/dev/null; ls -la /workspace 2>/dev/null | head"}} + + +Treat whatever directory comes back under ` + "`/sessions/*/mnt/`" + ` as ` + + `THE working folder for this session, no matter what ` + "``" + ` is. ` + + `Then use that path (or subpaths under it) for every subsequent tool_call. ` + + `Do NOT ask the user to name or re-state the folder — they already attached ` + + `it. The proxy also re-surfaces previously discovered mount roots under ` + + `"Known host-mount paths" on later turns; prefer those over re-probing.` + // Load reads a YAML config file from path. If path is empty it defaults to // ~/.cursor-adapter/config.yaml. When the file does not exist, a config with // default values is returned without an error. @@ -84,5 +188,15 @@ func (c *Config) validate() error { if c.Timeout <= 0 { return fmt.Errorf("timeout must be > 0, got %d", c.Timeout) } + switch c.CursorMode { + case "", "plan", "agent": + default: + return fmt.Errorf("cursor_mode must be \"plan\" or \"agent\", got %q", c.CursorMode) + } + if c.WorkspaceRoot != "" { + if !filepath.IsAbs(c.WorkspaceRoot) { + return fmt.Errorf("workspace_root must be an absolute path, got %q", c.WorkspaceRoot) + } + } return nil } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 18e1197..30671ea 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -66,6 +66,9 @@ available_models: ChatOnlyWorkspace: true, LogLevel: "DEBUG", AvailableModels: []string{"gpt-5.2", "claude-sonnet-4-20250514"}, + SystemPrompt: DefaultSystemPrompt, + CursorMode: "plan", + WorkspaceRoot: "", } if !reflect.DeepEqual(*cfg, want) { diff --git a/internal/converter/modelmap.go b/internal/converter/modelmap.go index 01e0ce2..94937d8 100644 --- a/internal/converter/modelmap.go +++ b/internal/converter/modelmap.go @@ -6,6 +6,15 @@ import "strings" // Allows users to configure friendly names in OpenCode instead of memorising // exact Cursor IDs like "claude-4.6-sonnet-medium". var shortAlias = map[string]string{ + // Claude 4.7 + "opus-4.7": "claude-opus-4-7-high", + "opus-4.7-thinking": "claude-opus-4-7-thinking-high", + "opus-4.7-low": "claude-opus-4-7-low", + "opus-4.7-medium": "claude-opus-4-7-medium", + "opus-4.7-high": "claude-opus-4-7-high", + "opus-4.7-xhigh": "claude-opus-4-7-xhigh", + "opus-4.7-max": "claude-opus-4-7-max", + // Claude 4.6 "sonnet-4.6": "claude-4.6-sonnet-medium", "sonnet-4.6-thinking": "claude-4.6-sonnet-medium-thinking", @@ -23,21 +32,29 @@ var shortAlias = map[string]string{ "sonnet-4": "claude-4-sonnet", "sonnet-4-thinking": "claude-4-sonnet-thinking", - // Anthropic API-style names - "claude-opus-4-6": "claude-4.6-opus-high", - "claude-opus-4.6": "claude-4.6-opus-high", - "claude-sonnet-4-6": "claude-4.6-sonnet-medium", - "claude-sonnet-4.6": "claude-4.6-sonnet-medium", - "claude-opus-4-5": "claude-4.5-opus-high", - "claude-opus-4.5": "claude-4.5-opus-high", - "claude-sonnet-4-5": "claude-4.5-sonnet", - "claude-sonnet-4.5": "claude-4.5-sonnet", - "claude-sonnet-4": "claude-4-sonnet", - "claude-opus-4-6-thinking": "claude-4.6-opus-high-thinking", - "claude-sonnet-4-6-thinking": "claude-4.6-sonnet-medium-thinking", - "claude-opus-4-5-thinking": "claude-4.5-opus-high-thinking", - "claude-sonnet-4-5-thinking": "claude-4.5-sonnet-thinking", - "claude-sonnet-4-thinking": "claude-4-sonnet-thinking", + // Anthropic API-style names → Cursor IDs + // Claude 4.7 + "claude-opus-4-7": "claude-opus-4-7-high", + "claude-opus-4.7": "claude-opus-4-7-high", + "claude-opus-4-7-thinking": "claude-opus-4-7-thinking-high", + "claude-opus-4.7-thinking": "claude-opus-4-7-thinking-high", + // Claude 4.6 + "claude-opus-4-6": "claude-4.6-opus-high", + "claude-opus-4.6": "claude-4.6-opus-high", + "claude-sonnet-4-6": "claude-4.6-sonnet-medium", + "claude-sonnet-4.6": "claude-4.6-sonnet-medium", + "claude-opus-4-6-thinking": "claude-4.6-opus-high-thinking", + "claude-sonnet-4-6-thinking": "claude-4.6-sonnet-medium-thinking", + // Claude 4.5 + "claude-opus-4-5": "claude-4.5-opus-high", + "claude-opus-4.5": "claude-4.5-opus-high", + "claude-sonnet-4-5": "claude-4.5-sonnet", + "claude-sonnet-4.5": "claude-4.5-sonnet", + "claude-opus-4-5-thinking": "claude-4.5-opus-high-thinking", + "claude-sonnet-4-5-thinking": "claude-4.5-sonnet-thinking", + // Claude 4 + "claude-sonnet-4": "claude-4-sonnet", + "claude-sonnet-4-thinking": "claude-4-sonnet-thinking", // Old Anthropic date-based names "claude-sonnet-4-20250514": "claude-4-sonnet", @@ -71,16 +88,32 @@ type aliasEntry struct { } var reverseAliases = []aliasEntry{ - {"claude-4.6-opus-high", "claude-opus-4-6", "Claude 4.6 Opus"}, - {"claude-4.6-opus-high-thinking", "claude-opus-4-6-thinking", "Claude 4.6 Opus (Thinking)"}, - {"claude-4.6-sonnet-medium", "claude-sonnet-4-6", "Claude 4.6 Sonnet"}, - {"claude-4.6-sonnet-medium-thinking", "claude-sonnet-4-6-thinking", "Claude 4.6 Sonnet (Thinking)"}, - {"claude-4.5-opus-high", "claude-opus-4-5", "Claude 4.5 Opus"}, - {"claude-4.5-opus-high-thinking", "claude-opus-4-5-thinking", "Claude 4.5 Opus (Thinking)"}, - {"claude-4.5-sonnet", "claude-sonnet-4-5", "Claude 4.5 Sonnet"}, - {"claude-4.5-sonnet-thinking", "claude-sonnet-4-5-thinking", "Claude 4.5 Sonnet (Thinking)"}, - {"claude-4-sonnet", "claude-sonnet-4", "Claude 4 Sonnet"}, - {"claude-4-sonnet-thinking", "claude-sonnet-4-thinking", "Claude 4 Sonnet (Thinking)"}, + // Claude 4.7 — Cursor uses "claude-opus-4-7-*" natively, add friendly aliases + {"claude-opus-4-7-low", "claude-opus-4.7-low", "Claude Opus 4.7 (Low)"}, + {"claude-opus-4-7-medium", "claude-opus-4.7-medium", "Claude Opus 4.7 (Medium)"}, + {"claude-opus-4-7-high", "claude-opus-4.7-high", "Claude Opus 4.7"}, + {"claude-opus-4-7-xhigh", "claude-opus-4.7-xhigh", "Claude Opus 4.7 (XHigh)"}, + {"claude-opus-4-7-max", "claude-opus-4.7-max", "Claude Opus 4.7 (Max)"}, + {"claude-opus-4-7-thinking-low", "claude-opus-4.7-thinking-low", "Claude Opus 4.7 Thinking (Low)"}, + {"claude-opus-4-7-thinking-medium", "claude-opus-4.7-thinking-medium", "Claude Opus 4.7 Thinking (Medium)"}, + {"claude-opus-4-7-thinking-high", "claude-opus-4.7-thinking-high", "Claude Opus 4.7 Thinking"}, + {"claude-opus-4-7-thinking-xhigh", "claude-opus-4.7-thinking-xhigh", "Claude Opus 4.7 Thinking (XHigh)"}, + {"claude-opus-4-7-thinking-max", "claude-opus-4.7-thinking-max", "Claude Opus 4.7 Thinking (Max)"}, + // Claude 4.6 + {"claude-4.6-opus-high", "claude-opus-4-6", "Claude Opus 4.6"}, + {"claude-4.6-opus-high-thinking", "claude-opus-4-6-thinking", "Claude Opus 4.6 (Thinking)"}, + {"claude-4.6-opus-max", "claude-opus-4-6-max", "Claude Opus 4.6 (Max)"}, + {"claude-4.6-opus-max-thinking", "claude-opus-4-6-max-thinking", "Claude Opus 4.6 Max (Thinking)"}, + {"claude-4.6-sonnet-medium", "claude-sonnet-4-6", "Claude Sonnet 4.6"}, + {"claude-4.6-sonnet-medium-thinking", "claude-sonnet-4-6-thinking", "Claude Sonnet 4.6 (Thinking)"}, + // Claude 4.5 + {"claude-4.5-opus-high", "claude-opus-4-5", "Claude Opus 4.5"}, + {"claude-4.5-opus-high-thinking", "claude-opus-4-5-thinking", "Claude Opus 4.5 (Thinking)"}, + {"claude-4.5-sonnet", "claude-sonnet-4-5", "Claude Sonnet 4.5"}, + {"claude-4.5-sonnet-thinking", "claude-sonnet-4-5-thinking", "Claude Sonnet 4.5 (Thinking)"}, + // Claude 4 + {"claude-4-sonnet", "claude-sonnet-4", "Claude Sonnet 4"}, + {"claude-4-sonnet-thinking", "claude-sonnet-4-thinking", "Claude Sonnet 4 (Thinking)"}, } // GetAnthropicModelAliases returns alias entries for models available in Cursor, diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index 410aeb9..3bc7dc9 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -23,6 +23,25 @@ var rules = []rule{ {regexp.MustCompile(`(?i)\bcc_version=[^\s;,\n]+[;,]?\s*`), ""}, {regexp.MustCompile(`(?i)\bcc_entrypoint=[^\s;,\n]+[;,]?\s*`), ""}, {regexp.MustCompile(`(?i)\bcch=[a-f0-9]+[;,]?\s*`), ""}, + + // --- Sandbox / capability limitation stripping --- + // Claude Desktop's system prompt tells the model it's in a sandbox, + // cannot access the filesystem, is in "Ask mode" / "Cowork mode", etc. + // These phrases cause the model to refuse helpful responses. We strip + // them so the model still sees tool definitions but not the restrictions. + + // "you cannot access ...", "you do not have access to ...", etc. + {regexp.MustCompile(`(?i)[^\n]*(?:you (?:cannot|can ?not|do not|don[''\x{2019}]t|are unable to) (?:access|read|write|modify|execute|run|create|delete|move|open))[^\n]*\n?`), ""}, + // "you are in a sandboxed environment", "running in a sandbox", etc. + {regexp.MustCompile(`(?i)[^\n]*(?:sandbox(?:ed)?|isolated) (?:environment|mode|context)[^\n]*\n?`), ""}, + // "you are in Ask mode" / "Cowork mode" / "read-only mode" + {regexp.MustCompile(`(?i)[^\n]*(?:Ask mode|Cowork(?:er)? mode|read[- ]only mode)[^\n]*\n?`), ""}, + // "you don't have filesystem access" / "no filesystem access" + {regexp.MustCompile(`(?i)[^\n]*(?:no|without|lack(?:s|ing)?|limited) (?:file ?system|file|terminal|shell|command[- ]line) access[^\n]*\n?`), ""}, + // "you cannot run commands on the user's machine" + {regexp.MustCompile(`(?i)[^\n]*cannot (?:run|execute) (?:commands?|scripts?|code) (?:on|in)[^\n]*\n?`), ""}, + + // --- Branding replacement --- // Replace "Claude Code" product name with "Cursor" (case-sensitive on purpose). {regexp.MustCompile(`\bClaude Code\b`), "Cursor"}, // Replace full Anthropic CLI description. Handle both straight and curly apostrophes. diff --git a/internal/server/anthropic.go b/internal/server/anthropic.go index 43efdec..8b64878 100644 --- a/internal/server/anthropic.go +++ b/internal/server/anthropic.go @@ -3,168 +3,188 @@ package server import ( "encoding/json" "fmt" + "regexp" "strings" "github.com/daniel/cursor-adapter/internal/sanitize" "github.com/daniel/cursor-adapter/internal/types" ) -// buildPromptFromAnthropicMessages flattens an Anthropic Messages request into -// a single prompt string suitable for `agent --print`. It: -// - renders tool_use / tool_result blocks as readable pseudo-XML so the -// model can follow the trajectory of previous tool calls -// - embeds the `tools` schema as part of the System block via -// toolsToSystemText, so the model knows what tools the outer agent (e.g. -// Claude Code) has available -// - runs every piece of free text through sanitize.Text to strip Claude Code -// branding and telemetry headers that would confuse the Cursor agent -func buildPromptFromAnthropicMessages(req types.AnthropicMessagesRequest) string { - var systemParts []string - for _, block := range req.System { - if block.Type == "text" && strings.TrimSpace(block.Text) != "" { - systemParts = append(systemParts, sanitize.Text(block.Text)) - } - } - if tools := toolsToSystemText(req.Tools); tools != "" { - systemParts = append(systemParts, tools) +// systemReminderRe matches ... blocks +// that Claude Desktop embeds inside user messages. +var systemReminderRe = regexp.MustCompile(`(?s).*?\s*`) + +// buildPromptFromAnthropicMessages flattens an Anthropic Messages request +// into a single prompt string suitable for `agent --print`. +// +// "Pure brain + remote executors" design: +// - DROP all client system messages (mode descriptions / sandbox warnings +// that make the model refuse). +// - USE ONLY the adapter's injected system prompt. +// - RENDER req.Tools as a plain-text inventory of executors that the +// caller (Claude Desktop / Claude Code / opencode) owns. The brain must +// know it has remote hands. +// - RENDER assistant tool_use and user tool_result blocks as readable +// transcript, so multi-turn ReAct loops keep working. +// - STRIP blocks embedded in user messages. +func buildPromptFromAnthropicMessages(req types.AnthropicMessagesRequest, injectedSystemPrompt string) string { + var prompt strings.Builder + + if injectedSystemPrompt != "" { + prompt.WriteString("System:\n") + prompt.WriteString(injectedSystemPrompt) + prompt.WriteString("\n\n") + } + + if hints := renderMountHints(extractMountHints(req)); hints != "" { + prompt.WriteString(hints) + prompt.WriteString("\n") + } + + if toolsBlock := renderToolsForBrain(req.Tools); toolsBlock != "" { + prompt.WriteString(toolsBlock) + prompt.WriteString("\n") } - var convo []string for _, msg := range req.Messages { - text := anthropicContentToText(msg.Content) + text := renderMessageBlocks(msg.Role, msg.Content) if text == "" { continue } switch msg.Role { case "assistant": - convo = append(convo, "Assistant: "+text) + prompt.WriteString("Assistant: ") default: - convo = append(convo, "User: "+text) + prompt.WriteString("User: ") } - } - - var prompt strings.Builder - if len(systemParts) > 0 { - prompt.WriteString("System:\n") - prompt.WriteString(strings.Join(systemParts, "\n\n")) + prompt.WriteString(text) prompt.WriteString("\n\n") } - prompt.WriteString(strings.Join(convo, "\n\n")) - prompt.WriteString("\n\nAssistant:") + + prompt.WriteString("Assistant:") return prompt.String() } -// anthropicContentToText renders a single message's content blocks as a -// single string. Unlike the old implementation, this one preserves tool_use -// and tool_result blocks so the model sees the full conversation trajectory -// rather than mysterious gaps. -func anthropicContentToText(content types.AnthropicContent) string { +// renderToolsForBrain converts the Anthropic tools[] array into a readable +// inventory the brain can reason about. The brain is told it MUST emit +// {...} sentinels when it wants to invoke one; the +// proxy translates that into real Anthropic tool_use blocks for the caller. +func renderToolsForBrain(tools []types.AnthropicTool) string { + if len(tools) == 0 { + return "" + } + var b strings.Builder + b.WriteString("Available executors (the caller will run these for you):\n") + for _, t := range tools { + b.WriteString("- ") + b.WriteString(t.Name) + if desc := strings.TrimSpace(t.Description); desc != "" { + b.WriteString(": ") + b.WriteString(singleLine(desc)) + } + if len(t.InputSchema) > 0 { + b.WriteString("\n input_schema: ") + b.WriteString(compactJSON(t.InputSchema)) + } + b.WriteString("\n") + } + b.WriteString("\nTo invoke a tool, output EXACTLY one fenced block (and nothing else for that turn):\n") + b.WriteString("\n") + b.WriteString(`{"name":"","input":{...}}` + "\n") + b.WriteString("\n") + b.WriteString("If you do NOT need a tool, just answer in plain text.\n") + return b.String() +} + +// renderMessageBlocks renders a single message's content blocks into a +// transcript snippet. Text blocks are sanitised; tool_use blocks render as +// `[tool_call name=... input=...]`; tool_result blocks render as +// `[tool_result for=... ok|error] ...`. +func renderMessageBlocks(role string, content types.AnthropicContent) string { var parts []string for _, block := range content { switch block.Type { case "text": - if block.Text != "" { - parts = append(parts, sanitize.Text(block.Text)) + if block.Text == "" { + continue + } + cleaned := systemReminderRe.ReplaceAllString(block.Text, "") + cleaned = sanitize.Text(cleaned) + cleaned = strings.TrimSpace(cleaned) + if cleaned != "" { + parts = append(parts, cleaned) } case "tool_use": - input := strings.TrimSpace(string(block.Input)) - if input == "" { - input = "{}" - } parts = append(parts, fmt.Sprintf( - "\n%s\n", - block.ID, block.Name, input, + "[tool_call name=%q input=%s]", + block.Name, compactJSON(block.Input), )) case "tool_result": - body := toolResultBody(block.Content) - errAttr := "" + status := "ok" if block.IsError { - errAttr = ` is_error="true"` + status = "error" + } + body := renderToolResultContent(block.Content) + if body == "" { + body = "(empty)" } parts = append(parts, fmt.Sprintf( - "\n%s\n", - block.ToolUseID, errAttr, body, + "[tool_result for=%s status=%s]\n%s", + block.ToolUseID, status, body, )) - case "image": - parts = append(parts, "[Image]") - case "document": - title := block.Title - if title == "" { - title = "Document" - } - parts = append(parts, "[Document: "+title+"]") + case "image", "document": + parts = append(parts, fmt.Sprintf("[%s attached]", block.Type)) } } return strings.Join(parts, "\n") } -// toolResultBody flattens the `content` field of a tool_result block, which -// can be either a plain string or an array of `{type, text}` content parts. -func toolResultBody(raw json.RawMessage) string { +// renderToolResultContent flattens a tool_result.content payload (which can +// be a string or an array of {type:"text",text:...} blocks) to plain text. +func renderToolResultContent(raw json.RawMessage) string { if len(raw) == 0 { return "" } - - var asString string - if err := json.Unmarshal(raw, &asString); err == nil { - return sanitize.Text(asString) + var s string + if err := json.Unmarshal(raw, &s); err == nil { + return strings.TrimSpace(s) } - - var parts []struct { + var blocks []struct { Type string `json:"type"` Text string `json:"text"` } - if err := json.Unmarshal(raw, &parts); err == nil { + if err := json.Unmarshal(raw, &blocks); err == nil { var out []string - for _, p := range parts { - if p.Type == "text" && p.Text != "" { - out = append(out, sanitize.Text(p.Text)) + for _, b := range blocks { + if b.Type == "text" && b.Text != "" { + out = append(out, b.Text) } } - return strings.Join(out, "\n") + return strings.TrimSpace(strings.Join(out, "\n")) } - - return string(raw) + return strings.TrimSpace(string(raw)) } -// toolsToSystemText renders a tools schema array into a system-prompt chunk -// describing each tool. The idea (from cursor-api-proxy) is that since the -// Cursor CLI does not expose native tool_call deltas over the proxy, we tell -// the model what tools exist so it can reference them in its text output. -// -// NOTE: This is a one-way passthrough. The proxy cannot turn the model's -// textual "I would call Write with {...}" back into structured tool_use -// blocks. Callers that need real tool-use routing (e.g. Claude Code's coding -// agent) should run tools client-side and feed tool_result back in. -func toolsToSystemText(tools []types.AnthropicTool) string { - if len(tools) == 0 { - return "" +func compactJSON(raw json.RawMessage) string { + if len(raw) == 0 { + return "{}" } - - var lines []string - lines = append(lines, - "Available tools (they belong to the caller, not to you; describe your", - "intended call in plain text and the caller will execute it):", - "", - ) - for _, t := range tools { - schema := strings.TrimSpace(string(t.InputSchema)) - if schema == "" { - schema = "{}" - } else { - var pretty any - if err := json.Unmarshal(t.InputSchema, &pretty); err == nil { - if out, err := json.MarshalIndent(pretty, "", " "); err == nil { - schema = string(out) - } - } - } - lines = append(lines, - "Function: "+t.Name, - "Description: "+sanitize.Text(t.Description), - "Parameters: "+schema, - "", - ) + var v interface{} + if err := json.Unmarshal(raw, &v); err != nil { + return string(raw) } - return strings.TrimRight(strings.Join(lines, "\n"), "\n") + out, err := json.Marshal(v) + if err != nil { + return string(raw) + } + return string(out) +} + +func singleLine(s string) string { + s = strings.ReplaceAll(s, "\r", " ") + s = strings.ReplaceAll(s, "\n", " ") + for strings.Contains(s, " ") { + s = strings.ReplaceAll(s, " ", " ") + } + return strings.TrimSpace(s) } diff --git a/internal/server/anthropic_handlers.go b/internal/server/anthropic_handlers.go index 576a0b1..7bbab48 100644 --- a/internal/server/anthropic_handlers.go +++ b/internal/server/anthropic_handlers.go @@ -2,8 +2,12 @@ package server import ( "context" + "crypto/rand" + "encoding/hex" "encoding/json" "fmt" + "io" + "log" "net/http" "strings" "time" @@ -13,16 +17,21 @@ import ( ) func (s *Server) handleAnthropicMessages(w http.ResponseWriter, r *http.Request) { + bodyBytes, readErr := io.ReadAll(r.Body) + if readErr != nil { + writeJSON(w, http.StatusBadRequest, types.NewErrorResponse("read body: "+readErr.Error(), "invalid_request_error", "")) + return + } + r.Body.Close() + var req types.AnthropicMessagesRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + if err := json.Unmarshal(bodyBytes, &req); err != nil { writeJSON(w, http.StatusBadRequest, types.NewErrorResponse("invalid request body: "+err.Error(), "invalid_request_error", "")) return } - defer r.Body.Close() if req.MaxTokens <= 0 { - writeJSON(w, http.StatusBadRequest, types.NewErrorResponse("max_tokens is required", "invalid_request_error", "")) - return + req.MaxTokens = 16384 } if len(req.Messages) == 0 { writeJSON(w, http.StatusBadRequest, types.NewErrorResponse("messages must not be empty", "invalid_request_error", "")) @@ -30,14 +39,33 @@ func (s *Server) handleAnthropicMessages(w http.ResponseWriter, r *http.Request) } model := req.Model - if model == "" { + if model == "" || model == "auto" { model = s.cfg.DefaultModel } cursorModel := converter.ResolveToCursorModel(model) sessionKey := ensureSessionHeader(w, r) + // Surface caller-side knobs in the log: which tool names the brain is + // about to see, and (if no explicit X-Cursor-Workspace header was set) + // any host directory the caller's prompt happens to mention. The + // detected directory is promoted onto the request header so the + // downstream bridge picks it up via the standard ctx override path. + if len(req.Tools) > 0 { + toolNames := make([]string, 0, len(req.Tools)) + for _, t := range req.Tools { + toolNames = append(toolNames, t.Name) + } + log.Printf("[tools] caller has %d executors: %v", len(toolNames), toolNames) + } + if r.Header.Get(workspaceHeaderName) == "" { + if detected := detectAnthropicCwd(req); detected != "" { + log.Printf("[workspace] detected caller cwd from prompt: %s", detected) + r.Header.Set(workspaceHeaderName, detected) + } + } + msgID := fmt.Sprintf("msg_%d", time.Now().UnixNano()) - prompt := buildPromptFromAnthropicMessages(req) + prompt := buildPromptFromAnthropicMessages(req, s.cfg.SystemPrompt) if req.Stream { s.streamAnthropicMessages(w, r, prompt, cursorModel, model, msgID, sessionKey) @@ -50,8 +78,9 @@ func (s *Server) handleAnthropicMessages(w http.ResponseWriter, r *http.Request) func (s *Server) streamAnthropicMessages(w http.ResponseWriter, r *http.Request, prompt, cursorModel, displayModel, msgID, sessionKey string) { sse := NewSSEWriter(w) parser := converter.NewStreamParser(msgID) + tcParser := NewToolCallStreamParser() - ctx, cancel := context.WithTimeout(r.Context(), time.Duration(s.cfg.Timeout)*time.Second) + ctx, cancel := context.WithTimeout(requestContext(r), time.Duration(s.cfg.Timeout)*time.Second) defer cancel() go func() { <-r.Context().Done() @@ -70,17 +99,68 @@ func (s *Server) streamAnthropicMessages(w http.ResponseWriter, r *http.Request, "content": []interface{}{}, }, }) - writeAnthropicSSE(sse, map[string]interface{}{ - "type": "content_block_start", - "index": 0, - "content_block": map[string]interface{}{"type": "text", "text": ""}, - }) - var accumulated strings.Builder + st := &anthropicStreamState{ + sse: sse, + blockIndex: 0, + } + + emitText := func(text string) { + if text == "" { + return + } + st.ensureTextBlockOpen() + writeAnthropicSSE(sse, map[string]interface{}{ + "type": "content_block_delta", + "index": st.blockIndex, + "delta": map[string]interface{}{"type": "text_delta", "text": text}, + }) + st.outChars += len(text) + } + + emitToolCall := func(call ParsedToolCall) { + st.closeTextBlockIfOpen() + st.blockIndex++ + toolID := newToolUseID() + writeAnthropicSSE(sse, map[string]interface{}{ + "type": "content_block_start", + "index": st.blockIndex, + "content_block": map[string]interface{}{ + "type": "tool_use", + "id": toolID, + "name": call.Name, + "input": map[string]interface{}{}, + }, + }) + writeAnthropicSSE(sse, map[string]interface{}{ + "type": "content_block_delta", + "index": st.blockIndex, + "delta": map[string]interface{}{ + "type": "input_json_delta", + "partial_json": string(call.Input), + }, + }) + writeAnthropicSSE(sse, map[string]interface{}{ + "type": "content_block_stop", + "index": st.blockIndex, + }) + st.toolCallsEmitted++ + } + + feedDelta := func(content string) bool { + emit, calls, err := tcParser.Feed(content) + emitText(emit) + for _, c := range calls { + emitToolCall(c) + } + if err != nil { + log.Printf("[tool_call] parse error: %v", err) + } + return true + } for line := range outputChan { result := parser.Parse(line) - if result.Skip { continue } @@ -93,12 +173,7 @@ func (s *Server) streamAnthropicMessages(w http.ResponseWriter, r *http.Request, } if result.Chunk != nil && len(result.Chunk.Choices) > 0 { if c := result.Chunk.Choices[0].Delta.Content; c != nil { - accumulated.WriteString(*c) - writeAnthropicSSE(sse, map[string]interface{}{ - "type": "content_block_delta", - "index": 0, - "delta": map[string]interface{}{"type": "text_delta", "text": *c}, - }) + feedDelta(*c) continue } } @@ -113,12 +188,7 @@ func (s *Server) streamAnthropicMessages(w http.ResponseWriter, r *http.Request, if result.Chunk != nil && len(result.Chunk.Choices) > 0 { if c := result.Chunk.Choices[0].Delta.Content; c != nil { - accumulated.WriteString(*c) - writeAnthropicSSE(sse, map[string]interface{}{ - "type": "content_block_delta", - "index": 0, - "delta": map[string]interface{}{"type": "text_delta", "text": *c}, - }) + feedDelta(*c) } } @@ -127,15 +197,25 @@ func (s *Server) streamAnthropicMessages(w http.ResponseWriter, r *http.Request, } } - outTokens := maxInt(1, accumulated.Len()/4) + if leftover, err := tcParser.Flush(); leftover != "" { + emitText(leftover) + if err != nil { + log.Printf("[tool_call] flush warning: %v", err) + } + } + + st.closeTextBlockIfOpen() + + stopReason := "end_turn" + if st.toolCallsEmitted > 0 { + stopReason = "tool_use" + } + + outTokens := maxInt(1, st.outChars/4) - writeAnthropicSSE(sse, map[string]interface{}{ - "type": "content_block_stop", - "index": 0, - }) writeAnthropicSSE(sse, map[string]interface{}{ "type": "message_delta", - "delta": map[string]interface{}{"stop_reason": "end_turn", "stop_sequence": nil}, + "delta": map[string]interface{}{"stop_reason": stopReason, "stop_sequence": nil}, "usage": map[string]interface{}{"output_tokens": outTokens}, }) writeAnthropicSSE(sse, map[string]interface{}{ @@ -149,27 +229,50 @@ func (s *Server) streamAnthropicMessages(w http.ResponseWriter, r *http.Request, } func (s *Server) nonStreamAnthropicMessages(w http.ResponseWriter, r *http.Request, prompt, cursorModel, displayModel, msgID, sessionKey string) { - ctx, cancel := context.WithTimeout(r.Context(), time.Duration(s.cfg.Timeout)*time.Second) + ctx, cancel := context.WithTimeout(requestContext(r), time.Duration(s.cfg.Timeout)*time.Second) defer cancel() go func() { <-r.Context().Done() cancel() }() - content, err := s.br.ExecuteSync(ctx, prompt, cursorModel, sessionKey) + rawContent, err := s.br.ExecuteSync(ctx, prompt, cursorModel, sessionKey) if err != nil { writeJSON(w, http.StatusInternalServerError, types.NewErrorResponse(err.Error(), "api_error", "")) return } - usage := estimateUsage(prompt, content) + + cleanText, calls := ExtractAllToolCalls(rawContent) + usage := estimateUsage(prompt, rawContent) + + var content []types.AnthropicResponseBlock + if cleanText != "" { + content = append(content, types.AnthropicResponseBlock{Type: "text", Text: cleanText}) + } + for _, c := range calls { + content = append(content, types.AnthropicResponseBlock{ + Type: "tool_use", + ID: newToolUseID(), + Name: c.Name, + Input: c.Input, + }) + } + if len(content) == 0 { + content = append(content, types.AnthropicResponseBlock{Type: "text", Text: ""}) + } + + stopReason := "end_turn" + if len(calls) > 0 { + stopReason = "tool_use" + } resp := types.AnthropicMessagesResponse{ ID: msgID, Type: "message", Role: "assistant", - Content: []types.AnthropicTextBlock{{Type: "text", Text: content}}, + Content: content, Model: displayModel, - StopReason: "end_turn", + StopReason: stopReason, Usage: types.AnthropicUsage{ InputTokens: usage.PromptTokens, OutputTokens: usage.CompletionTokens, @@ -178,6 +281,49 @@ func (s *Server) nonStreamAnthropicMessages(w http.ResponseWriter, r *http.Reque writeJSON(w, http.StatusOK, resp) } +// anthropicStreamState tracks per-request streaming state: which content +// block index we are on, whether the current text block is open, output +// character count for usage estimation, and how many tool_use blocks were +// emitted so we can pick stop_reason. +type anthropicStreamState struct { + sse *SSEWriter + blockIndex int + textOpen bool + outChars int + toolCallsEmitted int +} + +func (st *anthropicStreamState) ensureTextBlockOpen() { + if st.textOpen { + return + } + writeAnthropicSSE(st.sse, map[string]interface{}{ + "type": "content_block_start", + "index": st.blockIndex, + "content_block": map[string]interface{}{"type": "text", "text": ""}, + }) + st.textOpen = true +} + +func (st *anthropicStreamState) closeTextBlockIfOpen() { + if !st.textOpen { + return + } + writeAnthropicSSE(st.sse, map[string]interface{}{ + "type": "content_block_stop", + "index": st.blockIndex, + }) + st.textOpen = false +} + +func newToolUseID() string { + var b [12]byte + if _, err := rand.Read(b[:]); err != nil { + return fmt.Sprintf("toolu_%d", time.Now().UnixNano()) + } + return "toolu_" + hex.EncodeToString(b[:]) +} + func writeAnthropicSSE(sse *SSEWriter, event interface{}) { data, err := json.Marshal(event) if err != nil { diff --git a/internal/server/cwd_extract.go b/internal/server/cwd_extract.go new file mode 100644 index 0000000..29b2743 --- /dev/null +++ b/internal/server/cwd_extract.go @@ -0,0 +1,102 @@ +package server + +import ( + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/daniel/cursor-adapter/internal/types" +) + +// cwdPatterns matches the most common ways callers (Claude Code, opencode, +// Cursor CLI itself, custom clients) advertise their host working +// directory inside the prompt. +// +// Patterns must capture an absolute path in group 1. +var cwdPatterns = []*regexp.Regexp{ + // Claude Code style: + // + // Working directory: /Users/x/proj + // Is directory a git repo: Yes + // ... + // + regexp.MustCompile(`(?si).*?working directory:\s*(\S+)`), + + // Generic ... wrapper. + regexp.MustCompile(`(?i)\s*([^<\s][^<]*?)\s*`), + + // "Working directory: /abs/path" on its own line. + regexp.MustCompile(`(?im)^\s*working directory:\s*(/[^\s<>]+)\s*$`), + + // "Current working directory is /abs/path" / "current working directory: /abs/path" + regexp.MustCompile(`(?i)current working directory(?: is)?[:\s]+(/[^\s<>]+)`), + + // Loose "cwd: /abs/path" / "cwd=/abs/path". + regexp.MustCompile(`(?i)\bcwd\s*[:=]\s*(/[^\s<>]+)`), +} + +// detectCallerWorkspace returns the first absolute, host-resident directory +// it can extract from corpus. It rejects: +// - non-absolute paths (e.g. "src/") +// - paths that don't exist on the host (e.g. "/sessions/..." sandbox +// paths sent by Claude Desktop's Cowork VM) +// - paths that point to a file rather than a directory +// +// Returning "" simply means "no usable workspace hint found", and callers +// should fall back to config defaults. +func detectCallerWorkspace(corpus string) string { + for _, p := range cwdPatterns { + m := p.FindStringSubmatch(corpus) + if len(m) < 2 { + continue + } + cand := strings.TrimSpace(m[1]) + // Strip trailing punctuation that often follows a path in prose. + cand = strings.TrimRight(cand, `.,;:"'`+"`)>") + if cand == "" || !filepath.IsAbs(cand) { + continue + } + info, err := os.Stat(cand) + if err != nil || !info.IsDir() { + continue + } + return cand + } + return "" +} + +// detectAnthropicCwd scans an Anthropic Messages request for a workspace +// hint. It walks system blocks first (Claude Code / opencode usually put +// the block there), then user/assistant text blocks (some clients +// embed it as inside the first user message). +func detectAnthropicCwd(req types.AnthropicMessagesRequest) string { + var sb strings.Builder + for _, b := range req.System { + if b.Type == "text" && b.Text != "" { + sb.WriteString(b.Text) + sb.WriteByte('\n') + } + } + for _, m := range req.Messages { + for _, b := range m.Content { + if b.Type == "text" && b.Text != "" { + sb.WriteString(b.Text) + sb.WriteByte('\n') + } + } + } + return detectCallerWorkspace(sb.String()) +} + +// detectOpenAICwd scans an OpenAI-style chat completion request for a +// workspace hint, including system messages (which the brain prompt +// builder otherwise drops). +func detectOpenAICwd(req types.ChatCompletionRequest) string { + var sb strings.Builder + for _, m := range req.Messages { + sb.WriteString(string(m.Content)) + sb.WriteByte('\n') + } + return detectCallerWorkspace(sb.String()) +} diff --git a/internal/server/cwd_extract_test.go b/internal/server/cwd_extract_test.go new file mode 100644 index 0000000..b0d88b2 --- /dev/null +++ b/internal/server/cwd_extract_test.go @@ -0,0 +1,108 @@ +package server + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/daniel/cursor-adapter/internal/types" +) + +func TestDetectCallerWorkspace_ClaudeCodeEnvBlock(t *testing.T) { + dir := t.TempDir() + corpus := "\nWorking directory: " + dir + "\nIs directory a git repo: Yes\n" + got := detectCallerWorkspace(corpus) + if got != dir { + t.Fatalf("got %q, want %q", got, dir) + } +} + +func TestDetectCallerWorkspace_RejectsNonExistentSandboxPath(t *testing.T) { + corpus := "Working directory: /sessions/gracious-magical-franklin/proj" + got := detectCallerWorkspace(corpus) + if got != "" { + t.Fatalf("expected empty (path doesn't exist on host), got %q", got) + } +} + +func TestDetectCallerWorkspace_RejectsRelativePath(t *testing.T) { + corpus := "cwd: src/" + got := detectCallerWorkspace(corpus) + if got != "" { + t.Fatalf("expected empty for relative path, got %q", got) + } +} + +func TestDetectCallerWorkspace_RejectsFilePath(t *testing.T) { + dir := t.TempDir() + f := filepath.Join(dir, "file.txt") + if err := os.WriteFile(f, []byte("x"), 0o644); err != nil { + t.Fatal(err) + } + corpus := "Working directory: " + f + got := detectCallerWorkspace(corpus) + if got != "" { + t.Fatalf("expected empty for file path, got %q", got) + } +} + +func TestDetectAnthropicCwd_FromSystemBlock(t *testing.T) { + dir := t.TempDir() + req := types.AnthropicMessagesRequest{ + System: []types.AnthropicBlock{ + {Type: "text", Text: "\nWorking directory: " + dir + "\n"}, + }, + Messages: []types.AnthropicMessage{ + {Role: "user", Content: []types.AnthropicBlock{{Type: "text", Text: "hi"}}}, + }, + } + if got := detectAnthropicCwd(req); got != dir { + t.Fatalf("got %q, want %q", got, dir) + } +} + +func TestDetectAnthropicCwd_FromUserMessage(t *testing.T) { + dir := t.TempDir() + req := types.AnthropicMessagesRequest{ + Messages: []types.AnthropicMessage{ + {Role: "user", Content: []types.AnthropicBlock{ + {Type: "text", Text: "Current working directory: " + dir + "\nHelp me"}, + }}, + }, + } + if got := detectAnthropicCwd(req); got != dir { + t.Fatalf("got %q, want %q", got, dir) + } +} + +func TestDetectAnthropicCwd_TrimsTrailingPunctuation(t *testing.T) { + dir := t.TempDir() + corpus := "Working directory: " + dir + "." + if got := detectCallerWorkspace(corpus); got != dir { + t.Fatalf("got %q, want %q (trailing dot should be stripped)", got, dir) + } +} + +func TestDetectAnthropicCwd_NoneFound(t *testing.T) { + req := types.AnthropicMessagesRequest{ + Messages: []types.AnthropicMessage{ + {Role: "user", Content: []types.AnthropicBlock{{Type: "text", Text: "just a question"}}}, + }, + } + if got := detectAnthropicCwd(req); got != "" { + t.Fatalf("got %q, want empty", got) + } +} + +// Sanity check that none of our regexes mis-eat absolute paths inside +// regular sentences without a cwd marker. +func TestDetectCallerWorkspace_IgnoresUnmarkedAbsolutePaths(t *testing.T) { + corpus := "I edited /tmp/foo earlier." + if !strings.HasPrefix(corpus, "I edited") { // keep the import used + t.Fatal("test fixture changed") + } + if got := detectCallerWorkspace(corpus); got != "" { + t.Fatalf("got %q, want empty (no cwd marker)", got) + } +} diff --git a/internal/server/handlers.go b/internal/server/handlers.go index a408438..7045dbf 100644 --- a/internal/server/handlers.go +++ b/internal/server/handlers.go @@ -72,9 +72,23 @@ func (s *Server) handleChatCompletions(w http.ResponseWriter, r *http.Request) { return } + // --- Pure brain: only our system prompt, drop the client's --- var parts []string + if s.cfg.SystemPrompt != "" { + parts = append(parts, "system: "+s.cfg.SystemPrompt) + } for _, m := range req.Messages { + // Drop client system messages (mode descriptions, tool schemas). + if m.Role == "system" { + continue + } text := sanitize.Text(string(m.Content)) + // Strip blocks embedded in messages. + text = systemReminderRe.ReplaceAllString(text, "") + text = strings.TrimSpace(text) + if text == "" { + continue + } parts = append(parts, fmt.Sprintf("%s: %s", m.Role, text)) } prompt := strings.Join(parts, "\n") @@ -86,6 +100,13 @@ func (s *Server) handleChatCompletions(w http.ResponseWriter, r *http.Request) { cursorModel := converter.ResolveToCursorModel(model) sessionKey := ensureSessionHeader(w, r) + if r.Header.Get(workspaceHeaderName) == "" { + if detected := detectOpenAICwd(req); detected != "" { + slog.Debug("workspace detected from prompt", "path", detected) + r.Header.Set(workspaceHeaderName, detected) + } + } + chatID := fmt.Sprintf("chatcmpl-%d", time.Now().UnixNano()) created := time.Now().Unix() @@ -101,7 +122,7 @@ func (s *Server) streamChat(w http.ResponseWriter, r *http.Request, prompt, curs sse := NewSSEWriter(w) parser := converter.NewStreamParser(chatID) - ctx, cancel := context.WithTimeout(r.Context(), time.Duration(s.cfg.Timeout)*time.Second) + ctx, cancel := context.WithTimeout(requestContext(r), time.Duration(s.cfg.Timeout)*time.Second) defer cancel() go func() { <-r.Context().Done() @@ -194,7 +215,7 @@ func (s *Server) streamChat(w http.ResponseWriter, r *http.Request, prompt, curs } func (s *Server) nonStreamChat(w http.ResponseWriter, r *http.Request, prompt, cursorModel, displayModel, chatID string, created int64, sessionKey string) { - ctx, cancel := context.WithTimeout(r.Context(), time.Duration(s.cfg.Timeout)*time.Second) + ctx, cancel := context.WithTimeout(requestContext(r), time.Duration(s.cfg.Timeout)*time.Second) defer cancel() go func() { <-r.Context().Done() diff --git a/internal/server/messages_test.go b/internal/server/messages_test.go index a7114eb..6d46aeb 100644 --- a/internal/server/messages_test.go +++ b/internal/server/messages_test.go @@ -214,9 +214,11 @@ func TestChatCompletions_AcceptsArrayContentBlocks(t *testing.T) { if rec.Code != http.StatusOK { t.Fatalf("status = %d, want %d, body=%s", rec.Code, http.StatusOK, rec.Body.String()) } - if !strings.Contains(br.lastPrompt, "system: You are terse.") { - t.Fatalf("prompt = %q, want system text content", br.lastPrompt) + // Client system messages should be DROPPED (pure brain mode). + if strings.Contains(br.lastPrompt, "You are terse.") { + t.Fatalf("prompt should NOT contain client system message, got: %q", br.lastPrompt) } + // User text should still be present and concatenated. if !strings.Contains(br.lastPrompt, "user: hello world") { t.Fatalf("prompt = %q, want concatenated user text content", br.lastPrompt) } @@ -330,6 +332,133 @@ func TestAnthropicMessages_StreamingEmitsNoDuplicateFinalText(t *testing.T) { } } +func TestAnthropicMessages_PromptIncludesToolsAndToolHistory(t *testing.T) { + cfg := config.Defaults() + br := &mockBridge{executeSync: "ok"} + srv := New(&cfg, br) + + body := `{ + "model":"auto", + "max_tokens":128, + "tools":[{"name":"bash","description":"Run a shell command","input_schema":{"type":"object","properties":{"command":{"type":"string"}}}}], + "messages":[ + {"role":"user","content":[{"type":"text","text":"clean up my desktop"}]}, + {"role":"assistant","content":[{"type":"tool_use","id":"toolu_1","name":"bash","input":{"command":"ls ~/Desktop"}}]}, + {"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_1","content":"a.png\nb.txt"}]} + ], + "stream":false + }` + req := httptest.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + + srv.mux.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, body=%s", rec.Code, rec.Body.String()) + } + + prompt := br.lastPrompt + for _, want := range []string{ + "Available executors", + "- bash", + "Run a shell command", + "", + "clean up my desktop", + `[tool_call name="bash" input=`, + "[tool_result for=toolu_1 status=ok]", + "a.png", + } { + if !strings.Contains(prompt, want) { + t.Fatalf("prompt missing %q\nprompt:\n%s", want, prompt) + } + } +} + +func TestAnthropicMessages_NonStreamTranslatesToolCallToToolUse(t *testing.T) { + cfg := config.Defaults() + br := &mockBridge{ + executeSync: "I'll run it now.\n\n{\"name\":\"bash\",\"input\":{\"command\":\"mkdir -p ~/Desktop/screenshots\"}}\n", + } + srv := New(&cfg, br) + + req := httptest.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(`{ + "model":"auto", + "max_tokens":128, + "tools":[{"name":"bash"}], + "messages":[{"role":"user","content":"organize desktop"}], + "stream":false + }`)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + + srv.mux.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, body=%s", rec.Code, rec.Body.String()) + } + + body := rec.Body.String() + for _, want := range []string{ + `"stop_reason":"tool_use"`, + `"type":"tool_use"`, + `"name":"bash"`, + `"command":"mkdir -p ~/Desktop/screenshots"`, + `"type":"text"`, + `I'll run it now.`, + } { + if !strings.Contains(body, want) { + t.Fatalf("response missing %q\nbody=%s", want, body) + } + } +} + +func TestAnthropicMessages_StreamTranslatesToolCallToToolUseSSE(t *testing.T) { + cfg := config.Defaults() + srv := New(&cfg, &mockBridge{ + executeLines: []string{ + `{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"running\n"}]}}`, + `{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"running\n\n"}]}}`, + `{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"running\n\n{\"name\":\"bash\",\"input\":{\"command\":\"ls\"}}\n"}]}}`, + `{"type":"result","subtype":"success","usage":{"inputTokens":3,"outputTokens":2}}`, + }, + }) + + req := httptest.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(`{ + "model":"auto", + "max_tokens":128, + "tools":[{"name":"bash"}], + "messages":[{"role":"user","content":"go"}], + "stream":true + }`)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + + srv.mux.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, body=%s", rec.Code, rec.Body.String()) + } + + body := rec.Body.String() + for _, want := range []string{ + `"type":"message_start"`, + `"type":"content_block_start"`, + `"type":"text"`, + `"text":"running`, + `"type":"tool_use"`, + `"name":"bash"`, + `"type":"input_json_delta"`, + `\"command\":\"ls\"`, + `"stop_reason":"tool_use"`, + `"type":"message_stop"`, + } { + if !strings.Contains(body, want) { + t.Fatalf("stream missing %q\nbody=%s", want, body) + } + } + if strings.Contains(body, "") { + t.Fatalf("stream leaked raw sentinel: %s", body) + } +} + func TestAnthropicMessages_GeneratesSessionHeaderWhenMissing(t *testing.T) { cfg := config.Defaults() br := &mockBridge{executeSync: "Hello"} diff --git a/internal/server/mount_hints.go b/internal/server/mount_hints.go new file mode 100644 index 0000000..07bc957 --- /dev/null +++ b/internal/server/mount_hints.go @@ -0,0 +1,60 @@ +package server + +import ( + "regexp" + "sort" + "strings" + + "github.com/daniel/cursor-adapter/internal/types" +) + +// Cowork-style mount path: /sessions/--/mnt/ +// (and any deeper subpath; we capture only the mount root). +var mountPathRe = regexp.MustCompile(`/sessions/[a-z][a-z0-9]*(?:-[a-z][a-z0-9]*)+/mnt/[^\s/'"]+`) + +// extractMountHints walks all prior tool_result blocks in the conversation +// and returns any Cowork-style /sessions//mnt/ mount roots +// they reveal, deduped & sorted. +// +// This is purely stateless — we re-derive the set from the request body +// every turn. No server-side cache to invalidate, and it survives proxy +// restarts because the caller (Claude Desktop) replays the full history +// on each request anyway. +func extractMountHints(req types.AnthropicMessagesRequest) []string { + seen := map[string]struct{}{} + for _, m := range req.Messages { + for _, b := range m.Content { + if b.Type != "tool_result" { + continue + } + for _, p := range mountPathRe.FindAllString(renderToolResultContent(b.Content), -1) { + seen[p] = struct{}{} + } + } + } + if len(seen) == 0 { + return nil + } + out := make([]string, 0, len(seen)) + for p := range seen { + out = append(out, p) + } + sort.Strings(out) + return out +} + +// renderMountHints turns a list of mount roots into a prompt section the +// brain can refer to. Returns "" when there are no hints. +func renderMountHints(hints []string) string { + if len(hints) == 0 { + return "" + } + var b strings.Builder + b.WriteString("Known host-mount paths (discovered earlier in this conversation, prefer these for any host file work):\n") + for _, h := range hints { + b.WriteString("- ") + b.WriteString(h) + b.WriteByte('\n') + } + return b.String() +} diff --git a/internal/server/server.go b/internal/server/server.go index ed1a791..8fd11f8 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -49,6 +49,14 @@ func (s *Server) buildRouter() *chi.Mux { r.Post("/v1/messages", s.handleAnthropicMessages) r.Get("/health", s.handleHealth) + // Claude Desktop sends HEAD / as a health check before making API calls. + // Return 200 so it doesn't error with "K.text.trim" before sending the real request. + rootHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + r.Head("/", rootHandler) + r.Get("/", rootHandler) + return r } diff --git a/internal/server/session.go b/internal/server/session.go index 5bcec08..f42fc51 100644 --- a/internal/server/session.go +++ b/internal/server/session.go @@ -1,15 +1,31 @@ package server import ( + "context" "fmt" "net/http" + "path/filepath" "strings" "time" + + "github.com/daniel/cursor-adapter/internal/bridge" ) const sessionHeaderName = "X-Cursor-Session-ID" +const workspaceHeaderName = "X-Cursor-Workspace" const exposeHeadersName = "Access-Control-Expose-Headers" +// requestContext attaches per-request bridge knobs (currently: workspace +// override) read from headers onto ctx. +func requestContext(r *http.Request) context.Context { + ctx := r.Context() + ws := strings.TrimSpace(r.Header.Get(workspaceHeaderName)) + if ws != "" && filepath.IsAbs(ws) { + ctx = bridge.WithWorkspaceOverride(ctx, ws) + } + return ctx +} + func ensureSessionHeader(w http.ResponseWriter, r *http.Request) string { sessionKey := strings.TrimSpace(r.Header.Get(sessionHeaderName)) if sessionKey == "" { diff --git a/internal/server/toolcall.go b/internal/server/toolcall.go new file mode 100644 index 0000000..7fa4cf5 --- /dev/null +++ b/internal/server/toolcall.go @@ -0,0 +1,200 @@ +package server + +import ( + "encoding/json" + "fmt" + "strings" +) + +// Sentinels the brain is instructed to wrap tool calls with. We use XML-ish +// tags rather than markdown fences because they are unambiguous and easy to +// detect mid-stream without confusing them with normal code blocks. +const ( + toolCallOpen = "" + toolCallClose = "" +) + +// ParsedToolCall is a successfully extracted tool invocation request from +// the brain's text stream. +type ParsedToolCall struct { + Name string + Input json.RawMessage +} + +// ToolCallStreamParser is a small streaming state machine that splits an +// incoming text stream into: +// - safe-to-emit plain text (everything outside ...) +// - one or more ParsedToolCall (everything between sentinels) +// +// It buffers just enough trailing bytes to avoid emitting half of an opening +// sentinel as text. +type ToolCallStreamParser struct { + buf strings.Builder + inToolCall bool +} + +// NewToolCallStreamParser returns a fresh parser. +func NewToolCallStreamParser() *ToolCallStreamParser { + return &ToolCallStreamParser{} +} + +// Feed appends s to the parser's buffer and returns: +// - emitText: text safe to forward as text_delta to the caller now +// - calls: tool calls fully extracted in this Feed +// - err: a malformed tool_call block (invalid JSON inside sentinels) +// +// Feed never returns text that could be the prefix of an opening sentinel — +// such bytes stay buffered until the next Feed/Flush. +func (p *ToolCallStreamParser) Feed(s string) (emitText string, calls []ParsedToolCall, err error) { + p.buf.WriteString(s) + var emitted strings.Builder + + for { + current := p.buf.String() + if p.inToolCall { + closeIdx := strings.Index(current, toolCallClose) + if closeIdx < 0 { + return emitted.String(), calls, nil + } + payload := current[:closeIdx] + call, perr := parseToolCallPayload(payload) + rest := current[closeIdx+len(toolCallClose):] + rest = strings.TrimPrefix(rest, "\r") + rest = strings.TrimPrefix(rest, "\n") + p.buf.Reset() + p.buf.WriteString(rest) + p.inToolCall = false + if perr != nil { + return emitted.String(), calls, perr + } + calls = append(calls, call) + continue + } + + openIdx := strings.Index(current, toolCallOpen) + if openIdx >= 0 { + emitted.WriteString(current[:openIdx]) + rest := current[openIdx+len(toolCallOpen):] + rest = strings.TrimPrefix(rest, "\r") + rest = strings.TrimPrefix(rest, "\n") + p.buf.Reset() + p.buf.WriteString(rest) + p.inToolCall = true + continue + } + + // No open sentinel yet. Emit everything except a potential prefix + // of `` lurking at the tail of the buffer. + hold := potentialSentinelSuffix(current, toolCallOpen) + if hold == 0 { + emitted.WriteString(current) + p.buf.Reset() + return emitted.String(), calls, nil + } + emitted.WriteString(current[:len(current)-hold]) + tail := current[len(current)-hold:] + p.buf.Reset() + p.buf.WriteString(tail) + return emitted.String(), calls, nil + } +} + +// Flush returns any remaining buffered text and resets the parser. If we +// ended mid-`` block (no closing sentinel), the partial content +// is returned as plain text — better the caller sees something than data +// loss. +func (p *ToolCallStreamParser) Flush() (string, error) { + leftover := p.buf.String() + p.buf.Reset() + if p.inToolCall { + p.inToolCall = false + return toolCallOpen + leftover, fmt.Errorf("unterminated %s block", toolCallOpen) + } + return leftover, nil +} + +// ExtractAllToolCalls is the non-streaming counterpart: scan the full text +// once, return cleaned text (with tool_call blocks removed) plus extracted +// calls. Any malformed block is preserved verbatim in the returned text. +func ExtractAllToolCalls(text string) (cleanText string, calls []ParsedToolCall) { + var out strings.Builder + rest := text + for { + i := strings.Index(rest, toolCallOpen) + if i < 0 { + out.WriteString(rest) + break + } + out.WriteString(rest[:i]) + after := rest[i+len(toolCallOpen):] + j := strings.Index(after, toolCallClose) + if j < 0 { + // Unterminated; keep the rest verbatim. + out.WriteString(toolCallOpen) + out.WriteString(after) + break + } + payload := after[:j] + if call, err := parseToolCallPayload(payload); err == nil { + calls = append(calls, call) + } else { + // Keep malformed block as-is so the user can see it. + out.WriteString(toolCallOpen) + out.WriteString(payload) + out.WriteString(toolCallClose) + } + rest = strings.TrimPrefix(after[j+len(toolCallClose):], "\n") + } + return strings.TrimSpace(out.String()), calls +} + +func parseToolCallPayload(payload string) (ParsedToolCall, error) { + trimmed := strings.TrimSpace(payload) + // Allow the brain to wrap the JSON in ```json fences too. + trimmed = strings.TrimPrefix(trimmed, "```json") + trimmed = strings.TrimPrefix(trimmed, "```") + trimmed = strings.TrimSuffix(trimmed, "```") + trimmed = strings.TrimSpace(trimmed) + if trimmed == "" { + return ParsedToolCall{}, fmt.Errorf("empty tool_call body") + } + var raw struct { + Name string `json:"name"` + Tool string `json:"tool"` + Input json.RawMessage `json:"input"` + Args json.RawMessage `json:"arguments"` + } + if err := json.Unmarshal([]byte(trimmed), &raw); err != nil { + return ParsedToolCall{}, fmt.Errorf("invalid tool_call json: %w", err) + } + name := raw.Name + if name == "" { + name = raw.Tool + } + if name == "" { + return ParsedToolCall{}, fmt.Errorf("tool_call missing name") + } + input := raw.Input + if len(input) == 0 { + input = raw.Args + } + if len(input) == 0 { + input = json.RawMessage(`{}`) + } + return ParsedToolCall{Name: name, Input: input}, nil +} + +// potentialSentinelSuffix returns the length of the longest suffix of s +// that is a strict prefix of sentinel. +func potentialSentinelSuffix(s, sentinel string) int { + maxLen := len(sentinel) - 1 + if maxLen > len(s) { + maxLen = len(s) + } + for i := maxLen; i > 0; i-- { + if strings.HasPrefix(sentinel, s[len(s)-i:]) { + return i + } + } + return 0 +} diff --git a/internal/server/toolcall_test.go b/internal/server/toolcall_test.go new file mode 100644 index 0000000..cfe6b9b --- /dev/null +++ b/internal/server/toolcall_test.go @@ -0,0 +1,98 @@ +package server + +import ( + "strings" + "testing" +) + +func TestToolCallStreamParser_PlainTextPassThrough(t *testing.T) { + p := NewToolCallStreamParser() + emit, calls, err := p.Feed("hello world\n") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(calls) != 0 { + t.Fatalf("expected no calls, got %+v", calls) + } + if emit != "hello world\n" { + t.Fatalf("emit = %q, want passthrough", emit) + } + rest, err := p.Flush() + if err != nil { + t.Fatalf("flush error: %v", err) + } + if rest != "" { + t.Fatalf("flush leftover = %q, want empty", rest) + } +} + +func TestToolCallStreamParser_ExtractsCompleteCall(t *testing.T) { + p := NewToolCallStreamParser() + in := "before\n\n{\"name\":\"bash\",\"input\":{\"command\":\"ls\"}}\n\nafter" + emit, calls, err := p.Feed(in) + if err != nil { + t.Fatalf("error: %v", err) + } + if len(calls) != 1 { + t.Fatalf("expected 1 call, got %d", len(calls)) + } + if calls[0].Name != "bash" { + t.Fatalf("name = %q", calls[0].Name) + } + if !strings.Contains(string(calls[0].Input), `"command":"ls"`) { + t.Fatalf("input = %s", calls[0].Input) + } + if !strings.Contains(emit, "before") || !strings.Contains(emit, "after") { + t.Fatalf("emit lost surrounding text: %q", emit) + } +} + +func TestToolCallStreamParser_HoldsPartialOpenSentinel(t *testing.T) { + p := NewToolCallStreamParser() + // Feed a chunk ending with a partial "{\"name\":\"x\"}") + if err != nil { + t.Fatalf("error 2: %v", err) + } + if emit2 != "" { + t.Fatalf("emit2 = %q, want empty (only call extracted)", emit2) + } + if len(calls2) != 1 || calls2[0].Name != "x" { + t.Fatalf("calls2 = %+v", calls2) + } +} + +func TestToolCallStreamParser_RejectsInvalidJSON(t *testing.T) { + p := NewToolCallStreamParser() + _, _, err := p.Feed("not json") + if err == nil { + t.Fatal("expected parse error for invalid JSON inside sentinels") + } +} + +func TestExtractAllToolCalls_MultipleAndCleanText(t *testing.T) { + in := "preamble\n{\"name\":\"a\",\"input\":{}}\nmiddle\n{\"tool\":\"b\",\"arguments\":{\"x\":1}}\nend" + clean, calls := ExtractAllToolCalls(in) + if len(calls) != 2 { + t.Fatalf("calls = %d", len(calls)) + } + if calls[0].Name != "a" || calls[1].Name != "b" { + t.Fatalf("names = %q, %q", calls[0].Name, calls[1].Name) + } + if !strings.Contains(clean, "preamble") || !strings.Contains(clean, "middle") || !strings.Contains(clean, "end") { + t.Fatalf("clean text wrong: %q", clean) + } + if strings.Contains(clean, "") { + t.Fatalf("clean text still contains sentinels: %q", clean) + } +} diff --git a/internal/types/anthropic.go b/internal/types/anthropic.go index d8d8b5f..fe2f265 100644 --- a/internal/types/anthropic.go +++ b/internal/types/anthropic.go @@ -27,13 +27,29 @@ type AnthropicBlock struct { Title string `json:"title,omitempty"` } -// AnthropicTextBlock kept for response serialisation (proxy always returns -// text blocks back to the client; it does not emit tool_use natively). +// AnthropicTextBlock kept for response serialisation of plain text content. +// Deprecated: use AnthropicResponseBlock for outputs that may also carry +// tool_use blocks. type AnthropicTextBlock struct { Type string `json:"type"` Text string `json:"text,omitempty"` } +// AnthropicResponseBlock is a polymorphic content block emitted by the +// proxy. It can be a "text" block or a synthetic "tool_use" block produced +// by translating a brain-side ... sentinel. +type AnthropicResponseBlock struct { + Type string `json:"type"` + + // type=text + Text string `json:"text,omitempty"` + + // type=tool_use + ID string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Input json.RawMessage `json:"input,omitempty"` +} + // AnthropicContent is a flexible field: it can be a plain string OR an array // of blocks. Claude Code always sends the array form. type AnthropicContent []AnthropicBlock @@ -83,13 +99,13 @@ type AnthropicMessagesRequest struct { } type AnthropicMessagesResponse struct { - ID string `json:"id"` - Type string `json:"type"` - Role string `json:"role"` - Content []AnthropicTextBlock `json:"content"` - Model string `json:"model"` - StopReason string `json:"stop_reason"` - Usage AnthropicUsage `json:"usage"` + ID string `json:"id"` + Type string `json:"type"` + Role string `json:"role"` + Content []AnthropicResponseBlock `json:"content"` + Model string `json:"model"` + StopReason string `json:"stop_reason"` + Usage AnthropicUsage `json:"usage"` } type AnthropicUsage struct { diff --git a/main.go b/main.go index 47ec933..56fbb93 100644 --- a/main.go +++ b/main.go @@ -68,14 +68,16 @@ func run(cmd *cobra.Command, args []string) error { cfg.ChatOnlyWorkspace = chatOnlyFlag } - br := bridge.NewBridge( - cfg.CursorCLIPath, - logger, - cfg.UseACP, - cfg.ChatOnlyWorkspace, - cfg.MaxConcurrent, - time.Duration(cfg.Timeout)*time.Second, - ) + br := bridge.NewBridge(bridge.Options{ + CursorPath: cfg.CursorCLIPath, + Logger: logger, + UseACP: cfg.UseACP, + ChatOnly: cfg.ChatOnlyWorkspace, + MaxConcurrent: cfg.MaxConcurrent, + Timeout: time.Duration(cfg.Timeout) * time.Second, + Mode: cfg.CursorMode, + WorkspaceRoot: cfg.WorkspaceRoot, + }) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -91,7 +93,9 @@ func run(cmd *cobra.Command, args []string) error { } logger.Info("Starting cursor-adapter", "port", cfg.Port, - "mode", mode, + "transport", mode, + "cursor_mode", cfg.CursorMode, + "workspace_root", cfg.WorkspaceRoot, "chat_only_workspace", cfg.ChatOnlyWorkspace, ) return srv.Run()