From 4517b07f339e293f5e84e03ee45dc34f3968c59b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=80=A7=E9=A9=8A?= Date: Fri, 3 Apr 2026 02:28:47 +0800 Subject: [PATCH] feat: Implement Gemini Web provider with model selection - Fix handler to use PlaywrightProvider instead of old Provider - Add model selection for Gemini Web (fast/thinking/pro) - Improve response extraction to remove 'Gemini said' prefix - Add progress logging for headless mode - Fix navigation to use Domcontentloaded instead of Networkidle - Add proper input field selectors (.ql-editor) - Improve response completion detection with stability check --- internal/handlers/gemini_handler.go | 11 +- internal/providers/factory.go | 1 - .../geminiweb/playwright_provider.go | 207 ++++++++++++------ 3 files changed, 154 insertions(+), 65 deletions(-) diff --git a/internal/handlers/gemini_handler.go b/internal/handlers/gemini_handler.go index 8a11483..04d7065 100644 --- a/internal/handlers/gemini_handler.go +++ b/internal/handlers/gemini_handler.go @@ -59,8 +59,15 @@ func HandleGeminiChatCompletions(w http.ResponseWriter, r *http.Request, cfg con logger.LogRequestStart(method, pathname, rawModel, cfg.TimeoutMs, isStream) start := time.Now().UnixMilli() - // 創建 Gemini provider - provider := geminiweb.NewProvider(cfg) + // 創建 Gemini provider (使用 Playwright) + provider, provErr := geminiweb.NewPlaywrightProvider(cfg) + if provErr != nil { + logger.LogAgentError(cfg.SessionsLogPath, method, pathname, remoteAddress, -1, provErr.Error()) + httputil.WriteJSON(w, 500, map[string]interface{}{ + "error": map[string]string{"message": provErr.Error(), "code": "provider_error"}, + }, nil) + return + } if isStream { httputil.WriteSSEHeaders(w, nil) diff --git a/internal/providers/factory.go b/internal/providers/factory.go index c55fdc3..63cf767 100644 --- a/internal/providers/factory.go +++ b/internal/providers/factory.go @@ -25,7 +25,6 @@ func NewProvider(cfg config.BridgeConfig) (Provider, error) { case "cursor": return cursor.NewProvider(cfg), nil case "gemini-web": - // 使用新的 Playwright provider return geminiweb.NewPlaywrightProvider(cfg) default: return nil, fmt.Errorf("unknown provider: %s", providerType) diff --git a/internal/providers/geminiweb/playwright_provider.go b/internal/providers/geminiweb/playwright_provider.go index 213f85e..ced9dfa 100644 --- a/internal/providers/geminiweb/playwright_provider.go +++ b/internal/providers/geminiweb/playwright_provider.go @@ -135,38 +135,26 @@ func (p *PlaywrightProvider) Generate(ctx context.Context, model string, message if !strings.Contains(currentURL, "gemini.google.com") { fmt.Println("[GeminiWeb] Navigating to Gemini...") if _, err := p.page.Goto("https://gemini.google.com/app", playwright.PageGotoOptions{ - WaitUntil: playwright.WaitUntilStateNetworkidle, - Timeout: playwright.Float(30000), + WaitUntil: playwright.WaitUntilStateDomcontentloaded, + Timeout: playwright.Float(60000), }); err != nil { return fmt.Errorf("failed to navigate: %w", err) } + // 額外等待 JavaScript 載入 + fmt.Println("[GeminiWeb] Waiting for page to initialize...") + time.Sleep(3 * time.Second) } - // 3. 調試模式:如果可見,等待用戶確認 + // 3. 調試模式:等待用戶確認 if p.cfg.GeminiBrowserVisible { - fmt.Println("\n" + strings.Repeat("=", 60)) - fmt.Println("DEBUG MODE: Browser is visible") - fmt.Println("Please check the browser and press ENTER when ready...") - fmt.Println("If you see login page, please log in first") - fmt.Println(strings.Repeat("=", 60)) + fmt.Println("\n" + strings.Repeat("=", 70)) + fmt.Println("🔍 調試模式:瀏覽器已開啟") + fmt.Println("請檢查瀏覽器畫面,然後按 ENTER 繼續...") + fmt.Println("如果有問題,請查看: /tmp/gemini-debug.*") + fmt.Println(strings.Repeat("=", 70)) - // 等待用戶按 Enter var input string fmt.Scanln(&input) - - // 截圖保存 - screenshotPath := "/tmp/gemini-debug.png" - if _, err := p.page.Screenshot(playwright.PageScreenshotOptions{ - Path: playwright.String(screenshotPath), - }); err == nil { - fmt.Printf("[GeminiWeb] Screenshot saved to: %s\n", screenshotPath) - } - - // 輸出頁面信息 - fmt.Printf("[GeminiWeb] Current URL: %s\n", p.page.URL()) - if title, err := p.page.Title(); err == nil { - fmt.Printf("[GeminiWeb] Page Title: %s\n", title) - } } // 4. 等待頁面完全載入(project-golem 策略) @@ -197,14 +185,19 @@ func (p *PlaywrightProvider) Generate(ctx context.Context, model string, message fmt.Println("========================================\n") } } else { - fmt.Println("[GeminiWeb] Logged in") + fmt.Println("[GeminiWeb] ✓ Logged in") } - // 5. 建構提示詞 + // 5. 選擇模型(如果支援) + if err := p.selectModel(model); err != nil { + fmt.Printf("[GeminiWeb] Warning: model selection failed: %v\n", err) + } + + // 6. 建構提示詞 prompt := buildPromptFromMessagesPlaywright(messages) fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt)) - // 6. 輸入文字(使用 Playwright 的 Auto-wait) + // 7. 輸入文字(使用 Playwright 的 Auto-wait) if err := p.typeInput(prompt); err != nil { return fmt.Errorf("failed to type: %w", err) } @@ -287,12 +280,13 @@ func (p *PlaywrightProvider) waitForPageReady() error { // 2. 嘗試多種等待策略 inputSelectors := []string{ + ".ql-editor.ql-blank", + ".ql-editor", + "div[contenteditable='true'][role='textbox']", + "div[contenteditable='true']", ".ProseMirror", "rich-textarea", - "div[role='textbox']", - "div[contenteditable='true']", "textarea", - "input[type='text']", } // 策略 A: 等待任一輸入框出現 @@ -318,12 +312,13 @@ func (p *PlaywrightProvider) waitForPageReady() error { () => { // 檢查所有可能的輸入元素 const selectors = [ + '.ql-editor.ql-blank', + '.ql-editor', + 'div[contenteditable="true"][role="textbox"]', + 'div[contenteditable="true"]', '.ProseMirror', 'rich-textarea', - 'div[role="textbox"]', - 'div[contenteditable="true"]', - 'textarea', - 'input[type="text"]' + 'textarea' ]; for (const sel of selectors) { @@ -407,10 +402,12 @@ func (p *PlaywrightProvider) typeInput(text string) error { fmt.Println("[GeminiWeb] Looking for input field...") selectors := []string{ + ".ql-editor.ql-blank", + ".ql-editor", + "div[contenteditable='true'][role='textbox']", + "div[contenteditable='true']", ".ProseMirror", "rich-textarea", - "div[role='textbox'][contenteditable='true']", - "div[contenteditable='true']", "textarea", } @@ -485,15 +482,8 @@ func (p *PlaywrightProvider) sendMessage() error { // extractResponse 提取回應 func (p *PlaywrightProvider) extractResponse() (string, error) { - selectors := []string{ - ".model-response-text", - ".message-content", - ".markdown", - ".prose", - "model-response", - } - var lastText string + var stableCount int lastUpdate := time.Now() timeout := 120 * time.Second startTime := time.Now() @@ -501,46 +491,139 @@ func (p *PlaywrightProvider) extractResponse() (string, error) { for time.Since(startTime) < timeout { time.Sleep(500 * time.Millisecond) - // 嘗試所有選擇器 - for _, sel := range selectors { - locator := p.page.Locator(sel) - count, _ := locator.Count() - - if count > 0 { - // 取最後一個元素 - lastEl := locator.Last() - text, err := lastEl.TextContent() - if err != nil { - continue + // 使用 JavaScript 提取回應文字(更精確) + result, err := p.page.Evaluate(` + () => { + // 尋找所有可能的回應容器 + const selectors = [ + 'model-response', + '.model-response', + 'message-content', + '.message-content' + ]; + + for (const sel of selectors) { + const el = document.querySelector(sel); + if (el) { + // 嘗試找markdown內容 + const markdown = el.querySelector('.markdown, .prose, [class*="markdown"]'); + if (markdown && markdown.innerText.trim()) { + let text = markdown.innerText.trim(); + // 移除常見的標籤前綴 + text = text.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[::]\s*\n*/i, '').trim(); + return { text: text, source: sel + ' .markdown' }; + } + + // 嘗試找純文字內容(排除標籤) + let textContent = el.innerText.trim(); + if (textContent) { + // 移除常見的標籤前綴 + textContent = textContent.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[::]\s*\n*/i, '').trim(); + return { text: textContent, source: sel }; + } + } } + + return { text: '', source: 'none' }; + } + `) + if err == nil { + if m, ok := result.(map[string]interface{}); ok { + text, _ := m["text"].(string) text = strings.TrimSpace(text) + if text != "" && len(text) > len(lastText) { lastText = text lastUpdate = time.Now() - fmt.Printf("[GeminiWeb] Response length: %d\n", len(text)) + stableCount = 0 + fmt.Printf("[GeminiWeb] Response: %d chars\n", len(text)) } } } - // 檢查是否完成(2秒內無新內容) - if time.Since(lastUpdate) > 2*time.Second && lastText != "" { - // 最終檢查:停止按鈕是否還存在 - stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止']") - count, _ := stopBtn.Count() + // 檢查是否完成(需要連續 3 次穩定) + if time.Since(lastUpdate) > 500*time.Millisecond && lastText != "" { + stableCount++ + if stableCount >= 3 { + // 最終檢查:停止按鈕是否還存在 + stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止'], button[data-test-id='stop-button']") + count, _ := stopBtn.Count() - if count == 0 { - return lastText, nil + if count == 0 { + fmt.Println("[GeminiWeb] ✓ Response complete") + return lastText, nil + } } } } if lastText != "" { + fmt.Println("[GeminiWeb] ✓ Response complete (timeout)") return lastText, nil } return "", fmt.Errorf("response timeout") } +// selectModel 選擇 Gemini 模型 +// Gemini Web 只有三種模型:fast, thinking, pro +func (p *PlaywrightProvider) selectModel(model string) error { + // 映射模型名稱到 Gemini Web 的模型選擇器 + modelMap := map[string]string{ + "fast": "Fast", + "thinking": "Thinking", + "pro": "Pro", + "gemini-fast": "Fast", + "gemini-thinking": "Thinking", + "gemini-pro": "Pro", + "gemini-2.0-fast": "Fast", + "gemini-2.0-flash": "Fast", // 相容舊名稱 + "gemini-2.5-pro": "Pro", + "gemini-2.5-pro-thinking": "Thinking", + } + + // 從完整模型名稱中提取類型 + modelType := "" + modelLower := strings.ToLower(model) + for key, value := range modelMap { + if strings.Contains(modelLower, strings.ToLower(key)) || modelLower == strings.ToLower(key) { + modelType = value + break + } + } + + if modelType == "" { + // 默認使用 Fast + fmt.Printf("[GeminiWeb] Unknown model '%s', defaulting to Fast\n", model) + return nil + } + + fmt.Printf("[GeminiWeb] Selecting model: %s\n", modelType) + + // 點擊模型選擇器 + modelSelector := p.page.Locator("button[aria-label*='Model'], button[aria-label*='模型'], [data-test-id='model-selector']") + if count, _ := modelSelector.Count(); count > 0 { + if err := modelSelector.First().Click(); err != nil { + fmt.Printf("[GeminiWeb] Warning: could not click model selector: %v\n", err) + } else { + time.Sleep(500 * time.Millisecond) + + // 選擇對應的模型選項 + optionSelector := p.page.Locator(fmt.Sprintf("button:has-text('%s'), [role='menuitem']:has-text('%s')", modelType, modelType)) + if count, _ := optionSelector.Count(); count > 0 { + if err := optionSelector.First().Click(); err != nil { + fmt.Printf("[GeminiWeb] Warning: could not select model: %v\n", err) + } else { + fmt.Printf("[GeminiWeb] ✓ Model selected: %s\n", modelType) + time.Sleep(500 * time.Millisecond) + } + } + } + } + + return nil +} + // buildPromptFromMessages 從訊息列表建構提示詞 func buildPromptFromMessagesPlaywright(messages []apitypes.Message) string { var prompt string