package geminiweb import ( "context" "cursor-api-proxy/internal/apitypes" "cursor-api-proxy/internal/config" "fmt" "os" "path/filepath" "strings" "sync" "time" "github.com/playwright-community/playwright-go" ) // PlaywrightProvider 使用 Playwright 的 Gemini Provider type PlaywrightProvider struct { cfg config.BridgeConfig pw *playwright.Playwright browser playwright.Browser context playwright.BrowserContext page playwright.Page mu sync.Mutex userDataDir string } var ( playwrightInstance *playwright.Playwright playwrightOnce sync.Once playwrightErr error ) // NewPlaywrightProvider 建立新的 Playwright Provider func NewPlaywrightProvider(cfg config.BridgeConfig) (*PlaywrightProvider, error) { // 確保 Playwright 已初始化(單例) playwrightOnce.Do(func() { playwrightInstance, playwrightErr = playwright.Run() if playwrightErr != nil { playwrightErr = fmt.Errorf("failed to run playwright: %w", playwrightErr) } }) if playwrightErr != nil { return nil, playwrightErr } // 清理 Chrome 鎖檔案 userDataDir := filepath.Join(cfg.GeminiAccountDir, "default-session") cleanLockFiles(userDataDir) // 確保目錄存在 if err := os.MkdirAll(userDataDir, 0755); err != nil { return nil, fmt.Errorf("failed to create user data dir: %w", err) } return &PlaywrightProvider{ cfg: cfg, pw: playwrightInstance, userDataDir: userDataDir, }, nil } // getName 返回 Provider 名稱 func (p *PlaywrightProvider) Name() string { return "gemini-web" } // launchIfNeeded 如果需要則啟動瀏覽器 func (p *PlaywrightProvider) launchIfNeeded() error { p.mu.Lock() defer p.mu.Unlock() if p.context != nil && p.page != nil { return nil } fmt.Println("[GeminiWeb] Launching Chromium...") // 使用 LaunchPersistentContext(自動保存 session) context, err := p.pw.Chromium.LaunchPersistentContext(p.userDataDir, playwright.BrowserTypeLaunchPersistentContextOptions{ Headless: playwright.Bool(!p.cfg.GeminiBrowserVisible), Args: []string{ "--no-first-run", "--no-default-browser-check", "--disable-background-networking", "--disable-extensions", "--disable-plugins", "--disable-sync", }, }) if err != nil { return fmt.Errorf("failed to launch persistent context: %w", err) } p.context = context // 取得或建立頁面 pages := context.Pages() if len(pages) > 0 { p.page = pages[0] } else { page, err := context.NewPage() if err != nil { _ = context.Close() return fmt.Errorf("failed to create page: %w", err) } p.page = page } fmt.Println("[GeminiWeb] Browser launched") return nil } // Generate 生成回應 func (p *PlaywrightProvider) Generate(ctx context.Context, model string, messages []apitypes.Message, tools []apitypes.Tool, cb func(apitypes.StreamChunk)) (err error) { // 確保在返回錯誤時保存診斷 defer func() { if err != nil { fmt.Println("[GeminiWeb] Error occurred, saving diagnostics...") _ = p.saveDiagnostics() } }() fmt.Printf("[GeminiWeb] Starting generation with model: %s\n", model) // 1. 確保瀏覽器已啟動 if err := p.launchIfNeeded(); err != nil { return fmt.Errorf("failed to launch browser: %w", err) } // 2. 導航到 Gemini(如果需要) currentURL := p.page.URL() if !strings.Contains(currentURL, "gemini.google.com") { fmt.Println("[GeminiWeb] Navigating to Gemini...") if _, err := p.page.Goto("https://gemini.google.com/app", playwright.PageGotoOptions{ WaitUntil: playwright.WaitUntilStateDomcontentloaded, Timeout: playwright.Float(60000), }); err != nil { return fmt.Errorf("failed to navigate: %w", err) } // 額外等待 JavaScript 載入 fmt.Println("[GeminiWeb] Waiting for page to initialize...") time.Sleep(3 * time.Second) } // 3. 調試模式:等待用戶確認 if p.cfg.GeminiBrowserVisible { fmt.Println("\n" + strings.Repeat("=", 70)) fmt.Println("🔍 調試模式:瀏覽器已開啟") fmt.Println("請檢查瀏覽器畫面,然後按 ENTER 繼續...") fmt.Println("如果有問題,請查看: /tmp/gemini-debug.*") fmt.Println(strings.Repeat("=", 70)) var input string fmt.Scanln(&input) } // 4. 等待頁面完全載入(project-golem 策略) fmt.Println("[GeminiWeb] Waiting for page to be ready...") if err := p.waitForPageReady(); err != nil { fmt.Printf("[GeminiWeb] Warning: %v\n", err) // 額外調試:輸出頁面 HTML 結構 if p.cfg.GeminiBrowserVisible { html, _ := p.page.Content() debugPath := "/tmp/gemini-debug.html" if err := os.WriteFile(debugPath, []byte(html), 0644); err == nil { fmt.Printf("[GeminiWeb] HTML saved to: %s\n", debugPath) } } } // 4. 檢查登入狀態 fmt.Println("[GeminiWeb] Checking login status...") loggedIn := p.isLoggedIn() if !loggedIn { fmt.Println("[GeminiWeb] Not logged in, continuing anyway") if p.cfg.GeminiBrowserVisible { fmt.Println("\n========================================") fmt.Println("Browser is open. You can:") fmt.Println("1. Log in to Gemini now") fmt.Println("2. Continue without login") fmt.Println("========================================\n") } } else { fmt.Println("[GeminiWeb] ✓ Logged in") } // 5. 選擇模型(如果支援) if err := p.selectModel(model); err != nil { fmt.Printf("[GeminiWeb] Warning: model selection failed: %v\n", err) } // 6. 建構提示詞 prompt := buildPromptFromMessagesPlaywright(messages) fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt)) // 7. 輸入文字(使用 Playwright 的 Auto-wait) if err := p.typeInput(prompt); err != nil { return fmt.Errorf("failed to type: %w", err) } // 7. 發送訊息 fmt.Println("[GeminiWeb] Sending message...") if err := p.sendMessage(); err != nil { return fmt.Errorf("failed to send: %w", err) } // 8. 提取回應 fmt.Println("[GeminiWeb] Waiting for response...") response, err := p.extractResponse() if err != nil { return fmt.Errorf("failed to extract response: %w", err) } // 9. 回調 cb(apitypes.StreamChunk{Type: apitypes.ChunkText, Text: response}) cb(apitypes.StreamChunk{Type: apitypes.ChunkDone, Done: true}) fmt.Printf("[GeminiWeb] Response complete (%d chars)\n", len(response)) return nil } // Close 關閉 Provider func (p *PlaywrightProvider) Close() error { p.mu.Lock() defer p.mu.Unlock() if p.context != nil { if err := p.context.Close(); err != nil { return err } p.context = nil p.page = nil } return nil } // saveDiagnostics 保存診斷信息 func (p *PlaywrightProvider) saveDiagnostics() error { if p.page == nil { return fmt.Errorf("no page available") } // 截圖 screenshotPath := "/tmp/gemini-debug.png" if _, err := p.page.Screenshot(playwright.PageScreenshotOptions{ Path: playwright.String(screenshotPath), }); err == nil { fmt.Printf("[GeminiWeb] Screenshot saved: %s\n", screenshotPath) } // HTML htmlPath := "/tmp/gemini-debug.html" if html, err := p.page.Content(); err == nil { if err := os.WriteFile(htmlPath, []byte(html), 0644); err == nil { fmt.Printf("[GeminiWeb] HTML saved: %s\n", htmlPath) } } // 輸出頁面信息 url := p.page.URL() title, _ := p.page.Title() fmt.Printf("[GeminiWeb] Diagnostics: URL=%s, Title=%s\n", url, title) return nil } // waitForPageReady 等待頁面完全就緒(project-golem 策略) func (p *PlaywrightProvider) waitForPageReady() error { fmt.Println("[GeminiWeb] Checking for ready state...") // 1. 等待停止按鈕消失(如果存在) _, _ = p.page.WaitForSelector("button[aria-label*='Stop'], button[aria-label*='停止']", playwright.PageWaitForSelectorOptions{ State: playwright.WaitForSelectorStateDetached, Timeout: playwright.Float(5000), }) // 2. 嘗試多種等待策略 inputSelectors := []string{ ".ql-editor.ql-blank", ".ql-editor", "div[contenteditable='true'][role='textbox']", "div[contenteditable='true']", ".ProseMirror", "rich-textarea", "textarea", } // 策略 A: 等待任一輸入框出現 for i, sel := range inputSelectors { fmt.Printf(" [%d/%d] Waiting for: %s\n", i+1, len(inputSelectors), sel) locator := p.page.Locator(sel) if err := locator.WaitFor(playwright.LocatorWaitForOptions{ Timeout: playwright.Float(5000), State: playwright.WaitForSelectorStateVisible, }); err == nil { fmt.Printf(" ✓ Input field found: %s\n", sel) return nil } } // 策略 B: 等待頁面完全載入 fmt.Println("[GeminiWeb] Waiting for page load...") time.Sleep(3 * time.Second) // 策略 C: 使用 JavaScript 檢查 fmt.Println("[GeminiWeb] Checking with JavaScript...") result, err := p.page.Evaluate(` () => { // 檢查所有可能的輸入元素 const selectors = [ '.ql-editor.ql-blank', '.ql-editor', 'div[contenteditable="true"][role="textbox"]', 'div[contenteditable="true"]', '.ProseMirror', 'rich-textarea', 'textarea' ]; for (const sel of selectors) { const el = document.querySelector(sel); if (el) { return { found: true, selector: sel, tagName: el.tagName, className: el.className, visible: el.offsetParent !== null }; } } return { found: false }; } `) if err == nil { if m, ok := result.(map[string]interface{}); ok { if found, _ := m["found"].(bool); found { sel, _ := m["selector"].(string) fmt.Printf(" ✓ JavaScript found: %s\n", sel) return nil } } } // 策略 D: 調試模式 - 輸出頁面結構 if p.cfg.GeminiBrowserVisible { fmt.Println("[GeminiWeb].dump: Page structure analysis") _, _ = p.page.Evaluate(` () => { const allElements = document.querySelectorAll('*'); const inputLike = []; for (const el of allElements) { if (el.contentEditable === 'true' || el.role === 'textbox' || el.tagName === 'TEXTAREA' || el.tagName === 'INPUT') { inputLike.push({ tag: el.tagName, class: el.className, id: el.id, role: el.role, contentEditable: el.contentEditable }); } } console.log('Input-like elements:', inputLike); } `) } return fmt.Errorf("no input field found after all strategies") } // isLoggedIn 檢查是否已登入 func (p *PlaywrightProvider) isLoggedIn() bool { // 嘗試找輸入框(登入狀態的主要特徵) selectors := []string{ ".ProseMirror", "rich-textarea", "div[role='textbox']", "div[contenteditable='true']", "textarea", } for _, sel := range selectors { locator := p.page.Locator(sel) if count, _ := locator.Count(); count > 0 { return true } } return false } // typeInput 輸入文字(使用 Playwright 的 Auto-wait) func (p *PlaywrightProvider) typeInput(text string) error { fmt.Println("[GeminiWeb] Looking for input field...") selectors := []string{ ".ql-editor.ql-blank", ".ql-editor", "div[contenteditable='true'][role='textbox']", "div[contenteditable='true']", ".ProseMirror", "rich-textarea", "textarea", } var inputLocator playwright.Locator var found bool for _, sel := range selectors { fmt.Printf(" Trying: %s\n", sel) locator := p.page.Locator(sel) if err := locator.WaitFor(playwright.LocatorWaitForOptions{ Timeout: playwright.Float(3000), }); err == nil { inputLocator = locator found = true fmt.Printf(" ✓ Found with: %s\n", sel) break } } if !found { // 錯誤會被 Generate 的 defer 捕獲並保存診斷 url := p.page.URL() title, _ := p.page.Title() return fmt.Errorf("input field not found (URL=%s, Title=%s). Diagnostics will be saved to /tmp/", url, title) } // Focus 並填充(Playwright 自動等待) fmt.Printf("[GeminiWeb] Typing %d chars...\n", len(text)) if err := inputLocator.Fill(text); err != nil { return fmt.Errorf("failed to fill: %w", err) } fmt.Println("[GeminiWeb] Input complete") return nil } // sendMessage 發送訊息 func (p *PlaywrightProvider) sendMessage() error { // 方法 1: 按 Enter(最可靠) if err := p.page.Keyboard().Press("Enter"); err != nil { return fmt.Errorf("failed to press Enter: %w", err) } time.Sleep(200 * time.Millisecond) // 方法 2: 嘗試點擊發送按鈕(補強) _, _ = p.page.Evaluate(` () => { const keywords = ['發送', 'Send', '傳送']; const buttons = Array.from(document.querySelectorAll('button, [role="button"]')); for (const btn of buttons) { const text = (btn.innerText || btn.textContent || '').trim(); const label = (btn.getAttribute('aria-label') || '').trim(); // 跳過停止按鈕 if (['停止', 'Stop', '中斷'].includes(text) || label.toLowerCase().includes('stop')) { continue; } if (keywords.some(kw => text.includes(kw) || label.includes(kw))) { btn.click(); return true; } } return false; } `) return nil } // extractResponse 提取回應 func (p *PlaywrightProvider) extractResponse() (string, error) { var lastText string var stableCount int lastUpdate := time.Now() timeout := 120 * time.Second startTime := time.Now() for time.Since(startTime) < timeout { time.Sleep(500 * time.Millisecond) // 使用 JavaScript 提取回應文字(更精確) result, err := p.page.Evaluate(` () => { // 尋找所有可能的回應容器 const selectors = [ 'model-response', '.model-response', 'message-content', '.message-content' ]; for (const sel of selectors) { const el = document.querySelector(sel); if (el) { // 嘗試找markdown內容 const markdown = el.querySelector('.markdown, .prose, [class*="markdown"]'); if (markdown && markdown.innerText.trim()) { let text = markdown.innerText.trim(); // 移除常見的標籤前綴 text = text.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[::]\s*\n*/i, '').trim(); return { text: text, source: sel + ' .markdown' }; } // 嘗試找純文字內容(排除標籤) let textContent = el.innerText.trim(); if (textContent) { // 移除常見的標籤前綴 textContent = textContent.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[::]\s*\n*/i, '').trim(); return { text: textContent, source: sel }; } } } return { text: '', source: 'none' }; } `) if err == nil { if m, ok := result.(map[string]interface{}); ok { text, _ := m["text"].(string) text = strings.TrimSpace(text) if text != "" && len(text) > len(lastText) { lastText = text lastUpdate = time.Now() stableCount = 0 fmt.Printf("[GeminiWeb] Response: %d chars\n", len(text)) } } } // 檢查是否完成(需要連續 3 次穩定) if time.Since(lastUpdate) > 500*time.Millisecond && lastText != "" { stableCount++ if stableCount >= 3 { // 最終檢查:停止按鈕是否還存在 stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止'], button[data-test-id='stop-button']") count, _ := stopBtn.Count() if count == 0 { fmt.Println("[GeminiWeb] ✓ Response complete") return lastText, nil } } } } if lastText != "" { fmt.Println("[GeminiWeb] ✓ Response complete (timeout)") return lastText, nil } return "", fmt.Errorf("response timeout") } // selectModel 選擇 Gemini 模型 // Gemini Web 只有三種模型:fast, thinking, pro func (p *PlaywrightProvider) selectModel(model string) error { // 映射模型名稱到 Gemini Web 的模型選擇器 modelMap := map[string]string{ "fast": "Fast", "thinking": "Thinking", "pro": "Pro", "gemini-fast": "Fast", "gemini-thinking": "Thinking", "gemini-pro": "Pro", "gemini-2.0-fast": "Fast", "gemini-2.0-flash": "Fast", // 相容舊名稱 "gemini-2.5-pro": "Pro", "gemini-2.5-pro-thinking": "Thinking", } // 從完整模型名稱中提取類型 modelType := "" modelLower := strings.ToLower(model) for key, value := range modelMap { if strings.Contains(modelLower, strings.ToLower(key)) || modelLower == strings.ToLower(key) { modelType = value break } } if modelType == "" { // 默認使用 Fast fmt.Printf("[GeminiWeb] Unknown model '%s', defaulting to Fast\n", model) return nil } fmt.Printf("[GeminiWeb] Selecting model: %s\n", modelType) // 點擊模型選擇器 modelSelector := p.page.Locator("button[aria-label*='Model'], button[aria-label*='模型'], [data-test-id='model-selector']") if count, _ := modelSelector.Count(); count > 0 { if err := modelSelector.First().Click(); err != nil { fmt.Printf("[GeminiWeb] Warning: could not click model selector: %v\n", err) } else { time.Sleep(500 * time.Millisecond) // 選擇對應的模型選項 optionSelector := p.page.Locator(fmt.Sprintf("button:has-text('%s'), [role='menuitem']:has-text('%s')", modelType, modelType)) if count, _ := optionSelector.Count(); count > 0 { if err := optionSelector.First().Click(); err != nil { fmt.Printf("[GeminiWeb] Warning: could not select model: %v\n", err) } else { fmt.Printf("[GeminiWeb] ✓ Model selected: %s\n", modelType) time.Sleep(500 * time.Millisecond) } } } } return nil } // buildPromptFromMessages 從訊息列表建構提示詞 func buildPromptFromMessagesPlaywright(messages []apitypes.Message) string { var prompt string for _, m := range messages { switch m.Role { case "system": prompt += "System: " + m.Content + "\n\n" case "user": prompt += m.Content + "\n\n" case "assistant": prompt += "Assistant: " + m.Content + "\n\n" } } return prompt }