From 270accfd753a075c125e80facf4f74b0612d9916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=80=A7=E9=A9=8A?= Date: Fri, 3 Apr 2026 17:35:23 +0800 Subject: [PATCH] refactor(task-4): migrate providers to pkg/provider - Migrate cursor provider - Migrate geminiweb provider (playwright-based) - Update import paths to use pkg/domain/entity --- pkg/provider/cursor/provider.go | 27 + pkg/provider/geminiweb/browser.go | 125 ++++ pkg/provider/geminiweb/browser_manager.go | 173 +++++ pkg/provider/geminiweb/page.go | 250 +++++++ pkg/provider/geminiweb/playwright_provider.go | 641 ++++++++++++++++++ pkg/provider/geminiweb/pool.go | 169 +++++ pkg/provider/geminiweb/provider.go | 196 ++++++ 7 files changed, 1581 insertions(+) create mode 100644 pkg/provider/cursor/provider.go create mode 100644 pkg/provider/geminiweb/browser.go create mode 100644 pkg/provider/geminiweb/browser_manager.go create mode 100644 pkg/provider/geminiweb/page.go create mode 100644 pkg/provider/geminiweb/playwright_provider.go create mode 100644 pkg/provider/geminiweb/pool.go create mode 100644 pkg/provider/geminiweb/provider.go diff --git a/pkg/provider/cursor/provider.go b/pkg/provider/cursor/provider.go new file mode 100644 index 0000000..2d15241 --- /dev/null +++ b/pkg/provider/cursor/provider.go @@ -0,0 +1,27 @@ +package cursor + +import ( + "context" + "cursor-api-proxy/pkg/domain/entity" + "cursor-api-proxy/internal/config" +) + +type Provider struct { + cfg config.BridgeConfig +} + +func NewProvider(cfg config.BridgeConfig) *Provider { + return &Provider{cfg: cfg} +} + +func (p *Provider) Name() string { + return "cursor" +} + +func (p *Provider) Close() error { + return nil +} + +func (p *Provider) Generate(ctx context.Context, model string, messages []entity.Message, tools []entity.Tool, cb func(entity.StreamChunk)) error { + return nil +} diff --git a/pkg/provider/geminiweb/browser.go b/pkg/provider/geminiweb/browser.go new file mode 100644 index 0000000..a94894e --- /dev/null +++ b/pkg/provider/geminiweb/browser.go @@ -0,0 +1,125 @@ +package geminiweb + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/launcher" + "github.com/go-rod/rod/lib/proto" +) + +type Browser struct { + browser *rod.Browser + visible bool +} + +func NewBrowser(visible bool) (*Browser, error) { + l := launcher.New() + if visible { + l = l.Headless(false) + } else { + l = l.Headless(true) + } + + url, err := l.Launch() + if err != nil { + return nil, fmt.Errorf("failed to launch browser: %w", err) + } + + b := rod.New().ControlURL(url) + if err := b.Connect(); err != nil { + return nil, fmt.Errorf("failed to connect browser: %w", err) + } + + return &Browser{browser: b, visible: visible}, nil +} + +func (b *Browser) Close() error { + if b.browser != nil { + return b.browser.Close() + } + return nil +} + +func (b *Browser) NewPage() (*rod.Page, error) { + return b.browser.Page(proto.TargetCreateTarget{URL: "about:blank"}) +} + +type Cookie struct { + Name string `json:"name"` + Value string `json:"value"` + Domain string `json:"domain"` + Path string `json:"path"` + Expires float64 `json:"expires"` + HTTPOnly bool `json:"httpOnly"` + Secure bool `json:"secure"` +} + +func LoadCookiesFromFile(cookieFile string) ([]Cookie, error) { + data, err := os.ReadFile(cookieFile) + if err != nil { + return nil, fmt.Errorf("failed to read cookies: %w", err) + } + + var cookies []Cookie + if err := json.Unmarshal(data, &cookies); err != nil { + return nil, fmt.Errorf("failed to parse cookies: %w", err) + } + + return cookies, nil +} + +func SaveCookiesToFile(cookies []Cookie, cookieFile string) error { + data, err := json.MarshalIndent(cookies, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal cookies: %w", err) + } + + dir := filepath.Dir(cookieFile) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("failed to create cookie dir: %w", err) + } + + if err := os.WriteFile(cookieFile, data, 0644); err != nil { + return fmt.Errorf("failed to write cookies: %w", err) + } + + return nil +} + +func SetCookiesOnPage(page *rod.Page, cookies []Cookie) error { + var protoCookies []*proto.NetworkCookieParam + for _, c := range cookies { + p := &proto.NetworkCookieParam{ + Name: c.Name, + Value: c.Value, + Domain: c.Domain, + Path: c.Path, + HTTPOnly: c.HTTPOnly, + Secure: c.Secure, + } + if c.Expires > 0 { + exp := proto.TimeSinceEpoch(c.Expires) + p.Expires = exp + } + protoCookies = append(protoCookies, p) + } + return page.SetCookies(protoCookies) +} + +func WaitForElement(page *rod.Page, selector string, timeout time.Duration) (*rod.Element, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + return page.Context(ctx).Element(selector) +} + +func WaitForElements(page *rod.Page, selector string, timeout time.Duration) (rod.Elements, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + return page.Context(ctx).Elements(selector) +} diff --git a/pkg/provider/geminiweb/browser_manager.go b/pkg/provider/geminiweb/browser_manager.go new file mode 100644 index 0000000..3cff6f7 --- /dev/null +++ b/pkg/provider/geminiweb/browser_manager.go @@ -0,0 +1,173 @@ +package geminiweb + +import ( + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/launcher" + "github.com/go-rod/rod/lib/proto" +) + +// BrowserManager 管理瀏覽器實例的生命週期 +type BrowserManager struct { + mu sync.Mutex + browser *rod.Browser + userDataDir string + page *rod.Page + visible bool + isRunning bool + currentModel string +} + +var ( + globalManager *BrowserManager + globalMu sync.Mutex +) + +// GetBrowserManager 獲取全域瀏覽器管理器(單例) +func GetBrowserManager(userDataDir string, visible bool) (*BrowserManager, error) { + globalMu.Lock() + defer globalMu.Unlock() + + if globalManager != nil { + return globalManager, nil + } + + manager, err := NewBrowserManager(userDataDir, visible) + if err != nil { + return nil, err + } + + globalManager = manager + return globalManager, nil +} + +// NewBrowserManager 建立新的瀏覽器管理器 +func NewBrowserManager(userDataDir string, visible bool) (*BrowserManager, error) { + cleanLockFiles(userDataDir) + + if err := os.MkdirAll(userDataDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create user data dir: %w", err) + } + + return &BrowserManager{ + userDataDir: userDataDir, + visible: visible, + }, nil +} + +// cleanLockFiles 清理 Chrome 的殘留鎖檔案 +func cleanLockFiles(userDataDir string) { + lockFiles := []string{ + "SingletonLock", + "SingletonCookie", + "SingletonSocket", + "Default/SingletonLock", + "Default/SingletonCookie", + "Default/SingletonSocket", + } + + for _, file := range lockFiles { + path := filepath.Join(userDataDir, file) + os.Remove(path) + } +} + +// Launch 啟動瀏覽器(如果尚未啟動) +func (m *BrowserManager) Launch() error { + m.mu.Lock() + defer m.mu.Unlock() + + if m.isRunning && m.browser != nil { + return nil + } + + l := launcher.New() + + if m.visible { + l = l.Headless(false) + } else { + l = l.Headless(true) + } + + l = l.UserDataDir(m.userDataDir) + + url, err := l.Launch() + if err != nil { + return fmt.Errorf("failed to launch browser: %w", err) + } + + b := rod.New().ControlURL(url) + if err := b.Connect(); err != nil { + return fmt.Errorf("failed to connect browser: %w", err) + } + + m.browser = b + + page, err := b.Page(proto.TargetCreateTarget{URL: "about:blank"}) + if err != nil { + _ = b.Close() + return fmt.Errorf("failed to create page: %w", err) + } + + m.page = page + m.isRunning = true + + return nil +} + +// GetPage 獲取頁面 +func (m *BrowserManager) GetPage() (*rod.Page, error) { + m.mu.Lock() + defer m.mu.Unlock() + + if !m.isRunning || m.browser == nil { + return nil, fmt.Errorf("browser not running") + } + + return m.page, nil +} + +// Close 關閉瀏覽器 +func (m *BrowserManager) Close() error { + m.mu.Lock() + defer m.mu.Unlock() + + if !m.isRunning { + return nil + } + + var err error + if m.browser != nil { + err = m.browser.Close() + m.browser = nil + } + + m.page = nil + m.isRunning = false + return err +} + +// IsRunning 檢查瀏覽器是否正在運行 +func (m *BrowserManager) IsRunning() bool { + m.mu.Lock() + defer m.mu.Unlock() + return m.isRunning +} + +// SetCurrentModel 設定當前模型 +func (m *BrowserManager) SetCurrentModel(model string) { + m.mu.Lock() + defer m.mu.Unlock() + m.currentModel = model +} + +// GetCurrentModel 獲取當前模型 +func (m *BrowserManager) GetCurrentModel() string { + m.mu.Lock() + defer m.mu.Unlock() + return m.currentModel +} diff --git a/pkg/provider/geminiweb/page.go b/pkg/provider/geminiweb/page.go new file mode 100644 index 0000000..7365bd3 --- /dev/null +++ b/pkg/provider/geminiweb/page.go @@ -0,0 +1,250 @@ +package geminiweb + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/go-rod/rod" +) + +const geminiURL = "https://gemini.google.com/app" + +// 輸入框選擇器(依優先順序) +var inputSelectors = []string{ + ".ProseMirror", + "rich-textarea", + "div[role='textbox'][contenteditable='true']", + "div[contenteditable='true']", + "textarea", +} + +// NavigateToGemini 導航到 Gemini +func NavigateToGemini(page *rod.Page) error { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + if err := page.Context(ctx).Navigate(geminiURL); err != nil { + return fmt.Errorf("failed to navigate: %w", err) + } + return page.Context(ctx).WaitLoad() +} + +// IsLoggedIn 檢查是否已登入 +func IsLoggedIn(page *rod.Page) bool { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + for _, sel := range inputSelectors { + if _, err := page.Context(ctx).Element(sel); err == nil { + return true + } + } + return false +} + +// SelectModel 選擇模型(可選) +func SelectModel(page *rod.Page, model string) error { + fmt.Printf("[GeminiWeb] Model selection skipped (using current model)\n") + return nil +} + +// TypeInput 在輸入框中輸入文字 +func TypeInput(page *rod.Page, text string) error { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + fmt.Println("[GeminiWeb] Looking for input field...") + + // 1. 嘗試所有選擇器 + var inputEl *rod.Element + var err error + + for _, sel := range inputSelectors { + fmt.Printf(" Trying: %s\n", sel) + inputEl, err = page.Context(ctx).Element(sel) + if err == nil { + fmt.Printf(" ✓ Found with: %s\n", sel) + break + } + } + + if err != nil { + // 2. Fallback: 嘗試等待頁面載入完成後重試 + fmt.Println("[GeminiWeb] Waiting for page to fully load...") + time.Sleep(3 * time.Second) + + for _, sel := range inputSelectors { + fmt.Printf(" Retrying: %s\n", sel) + inputEl, err = page.Context(ctx).Element(sel) + if err == nil { + fmt.Printf(" ✓ Found with: %s\n", sel) + break + } + } + } + + if err != nil { + // 3. Debug: 印出頁面標題和 URL + info, _ := page.Info() + fmt.Printf("[GeminiWeb] DEBUG: URL=%s Title=%s\n", info.URL, info.Title) + + // 4. Fallback: 嘗試更通用的選擇器 + fmt.Println("[GeminiWeb] Trying generic selectors...") + genericSelectors := []string{ + "div[contenteditable]", + "[contenteditable]", + "textarea", + "input[type='text']", + } + + for _, sel := range genericSelectors { + fmt.Printf(" Trying generic: %s\n", sel) + inputEl, err = page.Context(ctx).Element(sel) + if err == nil { + fmt.Printf(" ✓ Found with: %s\n", sel) + break + } + } + } + + if err != nil { + info, _ := page.Info() + return fmt.Errorf("input field not found after trying all selectors (URL=%s)", info.URL) + } + + // 2. Focus 輸入框 + fmt.Printf("[GeminiWeb] Focusing input field...\n") + if err := inputEl.Focus(); err != nil { + return fmt.Errorf("failed to focus input: %w", err) + } + + time.Sleep(500 * time.Millisecond) + + // 3. 使用 Input 方法 + fmt.Printf("[GeminiWeb] Typing %d chars...\n", len(text)) + if err := inputEl.Input(text); err != nil { + return fmt.Errorf("failed to input text: %w", err) + } + + time.Sleep(200 * time.Millisecond) + + fmt.Println("[GeminiWeb] Input complete") + return nil +} + +// ClickSend 發送訊息 +func ClickSend(page *rod.Page) error { + // 方法 1: 按 Enter + if err := page.Keyboard.Press('\r'); err != nil { + return fmt.Errorf("failed to press Enter: %w", err) + } + + time.Sleep(200 * time.Millisecond) + return nil +} + +// WaitForReady 等待頁面空閒 +func WaitForReady(page *rod.Page) error { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + fmt.Println("[GeminiWeb] Checking if page is ready...") + + for { + select { + case <-ctx.Done(): + fmt.Println("[GeminiWeb] Page ready check timeout, proceeding anyway") + return nil + default: + time.Sleep(500 * time.Millisecond) + + // 檢查是否有停止按鈕 + hasStopBtn := false + stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']") + for _, btn := range stopBtns { + visible, _ := btn.Visible() + if visible { + hasStopBtn = true + break + } + } + + if !hasStopBtn { + fmt.Println("[GeminiWeb] Page is ready") + return nil + } + } + } +} + +// ExtractResponse 提取回應文字 +func ExtractResponse(page *rod.Page) (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + var lastText string + lastUpdate := time.Now() + + for { + select { + case <-ctx.Done(): + if lastText != "" { + return lastText, nil + } + return "", fmt.Errorf("response timeout") + default: + time.Sleep(500 * time.Millisecond) + + // 尋找回應文字 + for _, sel := range responseSelectors { + elements, err := page.Elements(sel) + if err != nil || len(elements) == 0 { + continue + } + + // 取得最後一個元素的文字 + lastEl := elements[len(elements)-1] + text, err := lastEl.Text() + if err != nil { + continue + } + + text = strings.TrimSpace(text) + if text != "" && text != lastText && len(text) > len(lastText) { + lastText = text + lastUpdate = time.Now() + fmt.Printf("[GeminiWeb] Response length: %d\n", len(text)) + } + } + + // 檢查是否已完成(2 秒內沒有新內容) + if time.Since(lastUpdate) > 2*time.Second && lastText != "" { + // 最後檢查一次是否還有停止按鈕 + hasStopBtn := false + stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']") + for _, btn := range stopBtns { + visible, _ := btn.Visible() + if visible { + hasStopBtn = true + break + } + } + + if !hasStopBtn { + return lastText, nil + } + } + } + } +} + +// 默認的回應選擇器 +var responseSelectors = []string{ + ".model-response-text", + ".message-content", + ".markdown", + ".prose", + "model-response", +} diff --git a/pkg/provider/geminiweb/playwright_provider.go b/pkg/provider/geminiweb/playwright_provider.go new file mode 100644 index 0000000..dd63c59 --- /dev/null +++ b/pkg/provider/geminiweb/playwright_provider.go @@ -0,0 +1,641 @@ +package geminiweb + +import ( + "context" + "cursor-api-proxy/pkg/domain/entity" + "cursor-api-proxy/internal/config" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/playwright-community/playwright-go" +) + +// PlaywrightProvider 使用 Playwright 的 Gemini Provider +type PlaywrightProvider struct { + cfg config.BridgeConfig + pw *playwright.Playwright + browser playwright.Browser + context playwright.BrowserContext + page playwright.Page + mu sync.Mutex + userDataDir string +} + +var ( + playwrightInstance *playwright.Playwright + playwrightOnce sync.Once + playwrightErr error +) + +// NewPlaywrightProvider 建立新的 Playwright Provider +func NewPlaywrightProvider(cfg config.BridgeConfig) (*PlaywrightProvider, error) { + // 確保 Playwright 已初始化(單例) + playwrightOnce.Do(func() { + playwrightInstance, playwrightErr = playwright.Run() + if playwrightErr != nil { + playwrightErr = fmt.Errorf("failed to run playwright: %w", playwrightErr) + } + }) + + if playwrightErr != nil { + return nil, playwrightErr + } + + // 清理 Chrome 鎖檔案 + userDataDir := filepath.Join(cfg.GeminiAccountDir, "default-session") + cleanLockFiles(userDataDir) + + // 確保目錄存在 + if err := os.MkdirAll(userDataDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create user data dir: %w", err) + } + + return &PlaywrightProvider{ + cfg: cfg, + pw: playwrightInstance, + userDataDir: userDataDir, + }, nil +} + +// getName 返回 Provider 名稱 +func (p *PlaywrightProvider) Name() string { + return "gemini-web" +} + +// launchIfNeeded 如果需要則啟動瀏覽器 +func (p *PlaywrightProvider) launchIfNeeded() error { + p.mu.Lock() + defer p.mu.Unlock() + + if p.context != nil && p.page != nil { + return nil + } + + fmt.Println("[GeminiWeb] Launching Chromium...") + + // 使用 LaunchPersistentContext(自動保存 session) + context, err := p.pw.Chromium.LaunchPersistentContext(p.userDataDir, + playwright.BrowserTypeLaunchPersistentContextOptions{ + Headless: playwright.Bool(!p.cfg.GeminiBrowserVisible), + Args: []string{ + "--no-first-run", + "--no-default-browser-check", + "--disable-background-networking", + "--disable-extensions", + "--disable-plugins", + "--disable-sync", + }, + }) + if err != nil { + return fmt.Errorf("failed to launch persistent context: %w", err) + } + + p.context = context + + // 取得或建立頁面 + pages := context.Pages() + if len(pages) > 0 { + p.page = pages[0] + } else { + page, err := context.NewPage() + if err != nil { + _ = context.Close() + return fmt.Errorf("failed to create page: %w", err) + } + p.page = page + } + + fmt.Println("[GeminiWeb] Browser launched") + return nil +} + +// Generate 生成回應 +func (p *PlaywrightProvider) Generate(ctx context.Context, model string, messages []entity.Message, tools []entity.Tool, cb func(entity.StreamChunk)) (err error) { + // 確保在返回錯誤時保存診斷 + defer func() { + if err != nil { + fmt.Println("[GeminiWeb] Error occurred, saving diagnostics...") + _ = p.saveDiagnostics() + } + }() + + fmt.Printf("[GeminiWeb] Starting generation with model: %s\n", model) + + // 1. 確保瀏覽器已啟動 + if err := p.launchIfNeeded(); err != nil { + return fmt.Errorf("failed to launch browser: %w", err) + } + + // 2. 導航到 Gemini(如果需要) + currentURL := p.page.URL() + if !strings.Contains(currentURL, "gemini.google.com") { + fmt.Println("[GeminiWeb] Navigating to Gemini...") + if _, err := p.page.Goto("https://gemini.google.com/app", playwright.PageGotoOptions{ + WaitUntil: playwright.WaitUntilStateDomcontentloaded, + Timeout: playwright.Float(60000), + }); err != nil { + return fmt.Errorf("failed to navigate: %w", err) + } + // 額外等待 JavaScript 載入 + fmt.Println("[GeminiWeb] Waiting for page to initialize...") + time.Sleep(3 * time.Second) + } + + // 3. 調試模式:等待用戶確認 + if p.cfg.GeminiBrowserVisible { + fmt.Println("\n" + strings.Repeat("=", 70)) + fmt.Println("🔍 調試模式:瀏覽器已開啟") + fmt.Println("請檢查瀏覽器畫面,然後按 ENTER 繼續...") + fmt.Println("如果有問題,請查看: /tmp/gemini-debug.*") + fmt.Println(strings.Repeat("=", 70)) + + var input string + fmt.Scanln(&input) + } + + // 4. 等待頁面完全載入(project-golem 策略) + fmt.Println("[GeminiWeb] Waiting for page to be ready...") + if err := p.waitForPageReady(); err != nil { + fmt.Printf("[GeminiWeb] Warning: %v\n", err) + + // 額外調試:輸出頁面 HTML 結構 + if p.cfg.GeminiBrowserVisible { + html, _ := p.page.Content() + debugPath := "/tmp/gemini-debug.html" + if err := os.WriteFile(debugPath, []byte(html), 0644); err == nil { + fmt.Printf("[GeminiWeb] HTML saved to: %s\n", debugPath) + } + } + } + + // 4. 檢查登入狀態 + fmt.Println("[GeminiWeb] Checking login status...") + loggedIn := p.isLoggedIn() + if !loggedIn { + fmt.Println("[GeminiWeb] Not logged in, continuing anyway") + if p.cfg.GeminiBrowserVisible { + fmt.Println("\n========================================") + fmt.Println("Browser is open. You can:") + fmt.Println("1. Log in to Gemini now") + fmt.Println("2. Continue without login") + fmt.Println("========================================\n") + } + } else { + fmt.Println("[GeminiWeb] ✓ Logged in") + } + + // 5. 選擇模型(如果支援) + if err := p.selectModel(model); err != nil { + fmt.Printf("[GeminiWeb] Warning: model selection failed: %v\n", err) + } + + // 6. 建構提示詞 + prompt := buildPromptFromMessagesPlaywright(messages) + fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt)) + + // 7. 輸入文字(使用 Playwright 的 Auto-wait) + if err := p.typeInput(prompt); err != nil { + return fmt.Errorf("failed to type: %w", err) + } + + // 7. 發送訊息 + fmt.Println("[GeminiWeb] Sending message...") + if err := p.sendMessage(); err != nil { + return fmt.Errorf("failed to send: %w", err) + } + + // 8. 提取回應 + fmt.Println("[GeminiWeb] Waiting for response...") + response, err := p.extractResponse() + if err != nil { + return fmt.Errorf("failed to extract response: %w", err) + } + + // 9. 回調 + cb(entity.StreamChunk{Type: entity.ChunkText, Text: response}) + cb(entity.StreamChunk{Type: entity.ChunkDone, Done: true}) + + fmt.Printf("[GeminiWeb] Response complete (%d chars)\n", len(response)) + return nil +} + +// Close 關閉 Provider +func (p *PlaywrightProvider) Close() error { + p.mu.Lock() + defer p.mu.Unlock() + + if p.context != nil { + if err := p.context.Close(); err != nil { + return err + } + p.context = nil + p.page = nil + } + return nil +} + +// saveDiagnostics 保存診斷信息 +func (p *PlaywrightProvider) saveDiagnostics() error { + if p.page == nil { + return fmt.Errorf("no page available") + } + + // 截圖 + screenshotPath := "/tmp/gemini-debug.png" + if _, err := p.page.Screenshot(playwright.PageScreenshotOptions{ + Path: playwright.String(screenshotPath), + }); err == nil { + fmt.Printf("[GeminiWeb] Screenshot saved: %s\n", screenshotPath) + } + + // HTML + htmlPath := "/tmp/gemini-debug.html" + if html, err := p.page.Content(); err == nil { + if err := os.WriteFile(htmlPath, []byte(html), 0644); err == nil { + fmt.Printf("[GeminiWeb] HTML saved: %s\n", htmlPath) + } + } + + // 輸出頁面信息 + url := p.page.URL() + title, _ := p.page.Title() + fmt.Printf("[GeminiWeb] Diagnostics: URL=%s, Title=%s\n", url, title) + + return nil +} + +// waitForPageReady 等待頁面完全就緒(project-golem 策略) +func (p *PlaywrightProvider) waitForPageReady() error { + fmt.Println("[GeminiWeb] Checking for ready state...") + + // 1. 等待停止按鈕消失(如果存在) + _, _ = p.page.WaitForSelector("button[aria-label*='Stop'], button[aria-label*='停止']", playwright.PageWaitForSelectorOptions{ + State: playwright.WaitForSelectorStateDetached, + Timeout: playwright.Float(5000), + }) + + // 2. 嘗試多種等待策略 + inputSelectors := []string{ + ".ql-editor.ql-blank", + ".ql-editor", + "div[contenteditable='true'][role='textbox']", + "div[contenteditable='true']", + ".ProseMirror", + "rich-textarea", + "textarea", + } + + // 策略 A: 等待任一輸入框出現 + for i, sel := range inputSelectors { + fmt.Printf(" [%d/%d] Waiting for: %s\n", i+1, len(inputSelectors), sel) + locator := p.page.Locator(sel) + if err := locator.WaitFor(playwright.LocatorWaitForOptions{ + Timeout: playwright.Float(5000), + State: playwright.WaitForSelectorStateVisible, + }); err == nil { + fmt.Printf(" ✓ Input field found: %s\n", sel) + return nil + } + } + + // 策略 B: 等待頁面完全載入 + fmt.Println("[GeminiWeb] Waiting for page load...") + time.Sleep(3 * time.Second) + + // 策略 C: 使用 JavaScript 檢查 + fmt.Println("[GeminiWeb] Checking with JavaScript...") + result, err := p.page.Evaluate(` + () => { + // 檢查所有可能的輸入元素 + const selectors = [ + '.ql-editor.ql-blank', + '.ql-editor', + 'div[contenteditable="true"][role="textbox"]', + 'div[contenteditable="true"]', + '.ProseMirror', + 'rich-textarea', + 'textarea' + ]; + + for (const sel of selectors) { + const el = document.querySelector(sel); + if (el) { + return { + found: true, + selector: sel, + tagName: el.tagName, + className: el.className, + visible: el.offsetParent !== null + }; + } + } + + return { found: false }; + } + `) + + if err == nil { + if m, ok := result.(map[string]interface{}); ok { + if found, _ := m["found"].(bool); found { + sel, _ := m["selector"].(string) + fmt.Printf(" ✓ JavaScript found: %s\n", sel) + return nil + } + } + } + + // 策略 D: 調試模式 - 輸出頁面結構 + if p.cfg.GeminiBrowserVisible { + fmt.Println("[GeminiWeb].dump: Page structure analysis") + _, _ = p.page.Evaluate(` + () => { + const allElements = document.querySelectorAll('*'); + const inputLike = []; + for (const el of allElements) { + if (el.contentEditable === 'true' || + el.role === 'textbox' || + el.tagName === 'TEXTAREA' || + el.tagName === 'INPUT') { + inputLike.push({ + tag: el.tagName, + class: el.className, + id: el.id, + role: el.role, + contentEditable: el.contentEditable + }); + } + } + console.log('Input-like elements:', inputLike); + } + `) + } + + return fmt.Errorf("no input field found after all strategies") +} + +// isLoggedIn 檢查是否已登入 +func (p *PlaywrightProvider) isLoggedIn() bool { + // 嘗試找輸入框(登入狀態的主要特徵) + selectors := []string{ + ".ProseMirror", + "rich-textarea", + "div[role='textbox']", + "div[contenteditable='true']", + "textarea", + } + + for _, sel := range selectors { + locator := p.page.Locator(sel) + if count, _ := locator.Count(); count > 0 { + return true + } + } + return false +} + +// typeInput 輸入文字(使用 Playwright 的 Auto-wait) +func (p *PlaywrightProvider) typeInput(text string) error { + fmt.Println("[GeminiWeb] Looking for input field...") + + selectors := []string{ + ".ql-editor.ql-blank", + ".ql-editor", + "div[contenteditable='true'][role='textbox']", + "div[contenteditable='true']", + ".ProseMirror", + "rich-textarea", + "textarea", + } + + var inputLocator playwright.Locator + var found bool + + for _, sel := range selectors { + fmt.Printf(" Trying: %s\n", sel) + locator := p.page.Locator(sel) + if err := locator.WaitFor(playwright.LocatorWaitForOptions{ + Timeout: playwright.Float(3000), + }); err == nil { + inputLocator = locator + found = true + fmt.Printf(" ✓ Found with: %s\n", sel) + break + } + } + + if !found { + // 錯誤會被 Generate 的 defer 捕獲並保存診斷 + url := p.page.URL() + title, _ := p.page.Title() + return fmt.Errorf("input field not found (URL=%s, Title=%s). Diagnostics will be saved to /tmp/", url, title) + } + + // Focus 並填充(Playwright 自動等待) + fmt.Printf("[GeminiWeb] Typing %d chars...\n", len(text)) + if err := inputLocator.Fill(text); err != nil { + return fmt.Errorf("failed to fill: %w", err) + } + + fmt.Println("[GeminiWeb] Input complete") + return nil +} + +// sendMessage 發送訊息 +func (p *PlaywrightProvider) sendMessage() error { + // 方法 1: 按 Enter(最可靠) + if err := p.page.Keyboard().Press("Enter"); err != nil { + return fmt.Errorf("failed to press Enter: %w", err) + } + + time.Sleep(200 * time.Millisecond) + + // 方法 2: 嘗試點擊發送按鈕(補強) + _, _ = p.page.Evaluate(` + () => { + const keywords = ['發送', 'Send', '傳送']; + const buttons = Array.from(document.querySelectorAll('button, [role="button"]')); + + for (const btn of buttons) { + const text = (btn.innerText || btn.textContent || '').trim(); + const label = (btn.getAttribute('aria-label') || '').trim(); + + // 跳過停止按鈕 + if (['停止', 'Stop', '中斷'].includes(text) || label.toLowerCase().includes('stop')) { + continue; + } + + if (keywords.some(kw => text.includes(kw) || label.includes(kw))) { + btn.click(); + return true; + } + } + return false; + } + `) + + return nil +} + +// extractResponse 提取回應 +func (p *PlaywrightProvider) extractResponse() (string, error) { + var lastText string + var stableCount int + lastUpdate := time.Now() + timeout := 120 * time.Second + startTime := time.Now() + + for time.Since(startTime) < timeout { + time.Sleep(500 * time.Millisecond) + + // 使用 JavaScript 提取回應文字(更精確) + result, err := p.page.Evaluate(` + () => { + // 尋找所有可能的回應容器 + const selectors = [ + 'model-response', + '.model-response', + 'message-content', + '.message-content' + ]; + + for (const sel of selectors) { + const el = document.querySelector(sel); + if (el) { + // 嘗試找markdown內容 + const markdown = el.querySelector('.markdown, .prose, [class*="markdown"]'); + if (markdown && markdown.innerText.trim()) { + let text = markdown.innerText.trim(); + // 移除常見的標籤前綴 + text = text.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[::]\s*\n*/i, '').trim(); + return { text: text, source: sel + ' .markdown' }; + } + + // 嘗試找純文字內容(排除標籤) + let textContent = el.innerText.trim(); + if (textContent) { + // 移除常見的標籤前綴 + textContent = textContent.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[::]\s*\n*/i, '').trim(); + return { text: textContent, source: sel }; + } + } + } + + return { text: '', source: 'none' }; + } + `) + + if err == nil { + if m, ok := result.(map[string]interface{}); ok { + text, _ := m["text"].(string) + text = strings.TrimSpace(text) + + if text != "" && len(text) > len(lastText) { + lastText = text + lastUpdate = time.Now() + stableCount = 0 + fmt.Printf("[GeminiWeb] Response: %d chars\n", len(text)) + } + } + } + + // 檢查是否完成(需要連續 3 次穩定) + if time.Since(lastUpdate) > 500*time.Millisecond && lastText != "" { + stableCount++ + if stableCount >= 3 { + // 最終檢查:停止按鈕是否還存在 + stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止'], button[data-test-id='stop-button']") + count, _ := stopBtn.Count() + + if count == 0 { + fmt.Println("[GeminiWeb] ✓ Response complete") + return lastText, nil + } + } + } + } + + if lastText != "" { + fmt.Println("[GeminiWeb] ✓ Response complete (timeout)") + return lastText, nil + } + return "", fmt.Errorf("response timeout") +} + +// selectModel 選擇 Gemini 模型 +// Gemini Web 只有三種模型:fast, thinking, pro +func (p *PlaywrightProvider) selectModel(model string) error { + // 映射模型名稱到 Gemini Web 的模型選擇器 + modelMap := map[string]string{ + "fast": "Fast", + "thinking": "Thinking", + "pro": "Pro", + "gemini-fast": "Fast", + "gemini-thinking": "Thinking", + "gemini-pro": "Pro", + "gemini-2.0-fast": "Fast", + "gemini-2.0-flash": "Fast", // 相容舊名稱 + "gemini-2.5-pro": "Pro", + "gemini-2.5-pro-thinking": "Thinking", + } + + // 從完整模型名稱中提取類型 + modelType := "" + modelLower := strings.ToLower(model) + for key, value := range modelMap { + if strings.Contains(modelLower, strings.ToLower(key)) || modelLower == strings.ToLower(key) { + modelType = value + break + } + } + + if modelType == "" { + // 默認使用 Fast + fmt.Printf("[GeminiWeb] Unknown model '%s', defaulting to Fast\n", model) + return nil + } + + fmt.Printf("[GeminiWeb] Selecting model: %s\n", modelType) + + // 點擊模型選擇器 + modelSelector := p.page.Locator("button[aria-label*='Model'], button[aria-label*='模型'], [data-test-id='model-selector']") + if count, _ := modelSelector.Count(); count > 0 { + if err := modelSelector.First().Click(); err != nil { + fmt.Printf("[GeminiWeb] Warning: could not click model selector: %v\n", err) + } else { + time.Sleep(500 * time.Millisecond) + + // 選擇對應的模型選項 + optionSelector := p.page.Locator(fmt.Sprintf("button:has-text('%s'), [role='menuitem']:has-text('%s')", modelType, modelType)) + if count, _ := optionSelector.Count(); count > 0 { + if err := optionSelector.First().Click(); err != nil { + fmt.Printf("[GeminiWeb] Warning: could not select model: %v\n", err) + } else { + fmt.Printf("[GeminiWeb] ✓ Model selected: %s\n", modelType) + time.Sleep(500 * time.Millisecond) + } + } + } + } + + return nil +} + +// buildPromptFromMessages 從訊息列表建構提示詞 +func buildPromptFromMessagesPlaywright(messages []entity.Message) string { + var prompt string + for _, m := range messages { + switch m.Role { + case "system": + prompt += "System: " + m.Content + "\n\n" + case "user": + prompt += m.Content + "\n\n" + case "assistant": + prompt += "Assistant: " + m.Content + "\n\n" + } + } + return prompt +} diff --git a/pkg/provider/geminiweb/pool.go b/pkg/provider/geminiweb/pool.go new file mode 100644 index 0000000..88d4f89 --- /dev/null +++ b/pkg/provider/geminiweb/pool.go @@ -0,0 +1,169 @@ +package geminiweb + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + "time" +) + +type GeminiSession struct { + Name string `json:"name"` + CookieFile string `json:"cookie_file"` + LastUsed int64 `json:"last_used"` + ActiveCount int `json:"active_count"` + RateLimitEnd int64 `json:"rate_limit_end"` +} + +type SessionPool struct { + mu sync.Mutex + sessions []*GeminiSession + dir string + maxCount int +} + +func NewSessionPool(dir string, maxSessions int) (*SessionPool, error) { + if err := os.MkdirAll(dir, 0755); err != nil { + return nil, fmt.Errorf("failed to create session dir: %w", err) + } + + sessions, err := loadSessions(dir) + if err != nil { + return nil, fmt.Errorf("failed to load sessions: %w", err) + } + + return &SessionPool{ + sessions: sessions, + dir: dir, + maxCount: maxSessions, + }, nil +} + +func loadSessions(dir string) ([]*GeminiSession, error) { + entries, err := os.ReadDir(dir) + if err != nil { + return nil, err + } + + var sessions []*GeminiSession + for _, entry := range entries { + if !entry.IsDir() { + continue + } + name := entry.Name() + metaPath := filepath.Join(dir, name, "session.json") + data, err := os.ReadFile(metaPath) + if err != nil { + continue + } + + var s GeminiSession + if err := json.Unmarshal(data, &s); err != nil { + continue + } + sessions = append(sessions, &s) + } + + return sessions, nil +} + +func (p *SessionPool) Count() int { + p.mu.Lock() + defer p.mu.Unlock() + return len(p.sessions) +} + +func (p *SessionPool) GetAvailable() *GeminiSession { + p.mu.Lock() + defer p.mu.Unlock() + + now := time.Now().UnixMilli() + + var available []*GeminiSession + for _, s := range p.sessions { + if s.RateLimitEnd < now { + available = append(available, s) + } + } + + if len(available) == 0 { + return nil + } + + var best *GeminiSession + for _, s := range available { + if best == nil || s.ActiveCount < best.ActiveCount { + best = s + } else if s.ActiveCount == best.ActiveCount && s.LastUsed < best.LastUsed { + best = s + } + } + + return best +} + +func (p *SessionPool) StartSession(s *GeminiSession) { + p.mu.Lock() + defer p.mu.Unlock() + s.ActiveCount++ + s.LastUsed = time.Now().UnixMilli() + p.saveSession(s) +} + +func (p *SessionPool) EndSession(s *GeminiSession) { + p.mu.Lock() + defer p.mu.Unlock() + if s.ActiveCount > 0 { + s.ActiveCount-- + } + p.saveSession(s) +} + +func (p *SessionPool) RateLimitSession(s *GeminiSession, durationMs int64) { + p.mu.Lock() + defer p.mu.Unlock() + s.RateLimitEnd = time.Now().UnixMilli() + durationMs + p.saveSession(s) +} + +func (p *SessionPool) saveSession(s *GeminiSession) { + metaPath := filepath.Join(p.dir, s.Name, "session.json") + data, err := json.MarshalIndent(s, "", " ") + if err != nil { + return + } + _ = os.WriteFile(metaPath, data, 0644) +} + +func (p *SessionPool) CreateSession(name string) (*GeminiSession, error) { + p.mu.Lock() + defer p.mu.Unlock() + + sessionDir := filepath.Join(p.dir, name) + if err := os.MkdirAll(sessionDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create session dir: %w", err) + } + + s := &GeminiSession{ + Name: name, + CookieFile: filepath.Join(sessionDir, "cookies.json"), + LastUsed: time.Now().UnixMilli(), + } + + p.sessions = append(p.sessions, s) + p.saveSession(s) + + return s, nil +} + +func (p *SessionPool) GetSessionNames() []string { + p.mu.Lock() + defer p.mu.Unlock() + names := make([]string, len(p.sessions)) + for i, s := range p.sessions { + names[i] = s.Name + } + return names +} diff --git a/pkg/provider/geminiweb/provider.go b/pkg/provider/geminiweb/provider.go new file mode 100644 index 0000000..ca3ba70 --- /dev/null +++ b/pkg/provider/geminiweb/provider.go @@ -0,0 +1,196 @@ +package geminiweb + +import ( + "context" + "cursor-api-proxy/pkg/domain/entity" + "cursor-api-proxy/internal/config" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "time" +) + +// Provider 使用持久化瀏覽器管理器 +type Provider struct { + cfg config.BridgeConfig + managerOnce sync.Once + manager *BrowserManager + managerErr error +} + +// NewProvider 建立新的 Provider +func NewProvider(cfg config.BridgeConfig) *Provider { + return &Provider{cfg: cfg} +} + +// getName 返回 Provider 名稱 +func (p *Provider) Name() string { + return "gemini-web" +} + +// Close 關閉瀏覽器 +func (p *Provider) Close() error { + if p.manager != nil { + return p.manager.Close() + } + return nil +} + +// getManager 獲取或初始化瀏覽器管理器(單例) +func (p *Provider) getManager() (*BrowserManager, error) { + p.managerOnce.Do(func() { + sessionDir := p.getSessionDir() + p.manager, p.managerErr = GetBrowserManager(sessionDir, p.cfg.GeminiBrowserVisible) + }) + return p.manager, p.managerErr +} + +// getSessionDir 獲取 session 目錄 +func (p *Provider) getSessionDir() string { + // 使用單一 session 目錄(簡化設計) + return filepath.Join(p.cfg.GeminiAccountDir, "default-session") +} + +// Generate 生成回應 +func (p *Provider) Generate(ctx context.Context, model string, messages []entity.Message, tools []entity.Tool, cb func(entity.StreamChunk)) error { + fmt.Printf("[GeminiWeb] Starting generation with model: %s\n", model) + + // 1. 獲取瀏覽器管理器 + manager, err := p.getManager() + if err != nil { + return fmt.Errorf("failed to get browser manager: %w", err) + } + + // 2. 啟動瀏覽器(如果尚未啟動) + if !manager.IsRunning() { + fmt.Printf("[GeminiWeb] Launching browser...\n") + if err := manager.Launch(); err != nil { + return fmt.Errorf("failed to launch browser: %w", err) + } + } + + // 3. 獲取頁面 + page, err := manager.GetPage() + if err != nil { + return fmt.Errorf("failed to get page: %w", err) + } + + // 4. 檢查當前 URL,如果不是 Gemini 則導航 + currentURL, _ := page.Info() + if !strings.Contains(currentURL.URL, "gemini.google.com") { + fmt.Printf("[GeminiWeb] Navigating to Gemini...\n") + if err := NavigateToGemini(page); err != nil { + return fmt.Errorf("failed to navigate: %w", err) + } + time.Sleep(2 * time.Second) + } + + // 5. 檢查登入狀態 + fmt.Printf("[GeminiWeb] Checking login status...\n") + if !IsLoggedIn(page) { + fmt.Printf("[GeminiWeb] Not logged in, continuing anyway\n") + + if p.cfg.GeminiBrowserVisible { + fmt.Println("\n========================================") + fmt.Println("Browser is open. You can:") + fmt.Println("1. Log in to Gemini now") + fmt.Println("2. Continue without login") + fmt.Println("========================================\n") + } + } else { + fmt.Printf("[GeminiWeb] Logged in\n") + } + + // 6. 等待頁面就緒 + if err := WaitForReady(page); err != nil { + fmt.Printf("[GeminiWeb] Warning: %v\n", err) + } + + // 7. 建構提示詞 + prompt := buildPromptFromMessages(messages) + fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt)) + + // 8. 輸入文字 + if err := TypeInput(page, prompt); err != nil { + return fmt.Errorf("failed to type input: %w", err) + } + + // 9. 發送 + fmt.Printf("[GeminiWeb] Sending message...\n") + if err := ClickSend(page); err != nil { + return fmt.Errorf("failed to send: %w", err) + } + + // 10. 提取回應 + fmt.Printf("[GeminiWeb] Waiting for response...\n") + response, err := ExtractResponse(page) + if err != nil { + return fmt.Errorf("failed to extract response: %w", err) + } + + // 11. 串流回調 + cb(entity.StreamChunk{Type: entity.ChunkText, Text: response}) + cb(entity.StreamChunk{Type: entity.ChunkDone, Done: true}) + + fmt.Printf("[GeminiWeb] Response complete (%d chars)\n", len(response)) + return nil +} + +// buildPromptFromMessages 從訊息列表建構提示詞 +func buildPromptFromMessages(messages []entity.Message) string { + var prompt string + for _, m := range messages { + switch m.Role { + case "system": + prompt += "System: " + m.Content + "\n\n" + case "user": + prompt += m.Content + "\n\n" + case "assistant": + prompt += "Assistant: " + m.Content + "\n\n" + } + } + return prompt +} + +// RunLogin 執行登入流程(供 gemini-login 命令使用) +func RunLogin(cfg config.BridgeConfig, sessionName string) error { + if sessionName == "" { + sessionName = "default-session" + } + + sessionDir := filepath.Join(cfg.GeminiAccountDir, sessionName) + if err := os.MkdirAll(sessionDir, 0755); err != nil { + return fmt.Errorf("failed to create session dir: %w", err) + } + + fmt.Printf("Starting browser for login. Session: %s\n", sessionName) + fmt.Printf("Session directory: %s\n", sessionDir) + fmt.Println("Please log in to your Gemini account in the browser window.") + fmt.Println("Press Ctrl+C when you have completed the login...") + + manager, err := NewBrowserManager(sessionDir, true) // visible=true + if err != nil { + return fmt.Errorf("failed to create browser manager: %w", err) + } + + if err := manager.Launch(); err != nil { + return fmt.Errorf("failed to launch browser: %w", err) + } + defer manager.Close() + + page, err := manager.GetPage() + if err != nil { + return fmt.Errorf("failed to get page: %w", err) + } + + if err := NavigateToGemini(page); err != nil { + return fmt.Errorf("failed to navigate: %w", err) + } + + // 等待用戶手動登入... + // 使用 Ctrl+C 退出,瀏覽器資料會自動保存在 userDataDir + + return nil +}