feature/gemini-web-provider #1

Merged
daniel.w merged 16 commits from feature/gemini-web-provider into master 2026-04-02 18:36:51 +00:00
3 changed files with 154 additions and 65 deletions
Showing only changes of commit 4517b07f33 - Show all commits

View File

@ -59,8 +59,15 @@ func HandleGeminiChatCompletions(w http.ResponseWriter, r *http.Request, cfg con
logger.LogRequestStart(method, pathname, rawModel, cfg.TimeoutMs, isStream) logger.LogRequestStart(method, pathname, rawModel, cfg.TimeoutMs, isStream)
start := time.Now().UnixMilli() start := time.Now().UnixMilli()
// 創建 Gemini provider // 創建 Gemini provider (使用 Playwright)
provider := geminiweb.NewProvider(cfg) provider, provErr := geminiweb.NewPlaywrightProvider(cfg)
if provErr != nil {
logger.LogAgentError(cfg.SessionsLogPath, method, pathname, remoteAddress, -1, provErr.Error())
httputil.WriteJSON(w, 500, map[string]interface{}{
"error": map[string]string{"message": provErr.Error(), "code": "provider_error"},
}, nil)
return
}
if isStream { if isStream {
httputil.WriteSSEHeaders(w, nil) httputil.WriteSSEHeaders(w, nil)

View File

@ -25,7 +25,6 @@ func NewProvider(cfg config.BridgeConfig) (Provider, error) {
case "cursor": case "cursor":
return cursor.NewProvider(cfg), nil return cursor.NewProvider(cfg), nil
case "gemini-web": case "gemini-web":
// 使用新的 Playwright provider
return geminiweb.NewPlaywrightProvider(cfg) return geminiweb.NewPlaywrightProvider(cfg)
default: default:
return nil, fmt.Errorf("unknown provider: %s", providerType) return nil, fmt.Errorf("unknown provider: %s", providerType)

View File

@ -135,38 +135,26 @@ func (p *PlaywrightProvider) Generate(ctx context.Context, model string, message
if !strings.Contains(currentURL, "gemini.google.com") { if !strings.Contains(currentURL, "gemini.google.com") {
fmt.Println("[GeminiWeb] Navigating to Gemini...") fmt.Println("[GeminiWeb] Navigating to Gemini...")
if _, err := p.page.Goto("https://gemini.google.com/app", playwright.PageGotoOptions{ if _, err := p.page.Goto("https://gemini.google.com/app", playwright.PageGotoOptions{
WaitUntil: playwright.WaitUntilStateNetworkidle, WaitUntil: playwright.WaitUntilStateDomcontentloaded,
Timeout: playwright.Float(30000), Timeout: playwright.Float(60000),
}); err != nil { }); err != nil {
return fmt.Errorf("failed to navigate: %w", err) return fmt.Errorf("failed to navigate: %w", err)
} }
// 額外等待 JavaScript 載入
fmt.Println("[GeminiWeb] Waiting for page to initialize...")
time.Sleep(3 * time.Second)
} }
// 3. 調試模式:如果可見,等待用戶確認 // 3. 調試模式:等待用戶確認
if p.cfg.GeminiBrowserVisible { if p.cfg.GeminiBrowserVisible {
fmt.Println("\n" + strings.Repeat("=", 60)) fmt.Println("\n" + strings.Repeat("=", 70))
fmt.Println("DEBUG MODE: Browser is visible") fmt.Println("🔍 調試模式:瀏覽器已開啟")
fmt.Println("Please check the browser and press ENTER when ready...") fmt.Println("請檢查瀏覽器畫面,然後按 ENTER 繼續...")
fmt.Println("If you see login page, please log in first") fmt.Println("如果有問題,請查看: /tmp/gemini-debug.*")
fmt.Println(strings.Repeat("=", 60)) fmt.Println(strings.Repeat("=", 70))
// 等待用戶按 Enter
var input string var input string
fmt.Scanln(&input) fmt.Scanln(&input)
// 截圖保存
screenshotPath := "/tmp/gemini-debug.png"
if _, err := p.page.Screenshot(playwright.PageScreenshotOptions{
Path: playwright.String(screenshotPath),
}); err == nil {
fmt.Printf("[GeminiWeb] Screenshot saved to: %s\n", screenshotPath)
}
// 輸出頁面信息
fmt.Printf("[GeminiWeb] Current URL: %s\n", p.page.URL())
if title, err := p.page.Title(); err == nil {
fmt.Printf("[GeminiWeb] Page Title: %s\n", title)
}
} }
// 4. 等待頁面完全載入project-golem 策略) // 4. 等待頁面完全載入project-golem 策略)
@ -197,14 +185,19 @@ func (p *PlaywrightProvider) Generate(ctx context.Context, model string, message
fmt.Println("========================================\n") fmt.Println("========================================\n")
} }
} else { } else {
fmt.Println("[GeminiWeb] Logged in") fmt.Println("[GeminiWeb] Logged in")
} }
// 5. 建構提示詞 // 5. 選擇模型(如果支援)
if err := p.selectModel(model); err != nil {
fmt.Printf("[GeminiWeb] Warning: model selection failed: %v\n", err)
}
// 6. 建構提示詞
prompt := buildPromptFromMessagesPlaywright(messages) prompt := buildPromptFromMessagesPlaywright(messages)
fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt)) fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt))
// 6. 輸入文字(使用 Playwright 的 Auto-wait // 7. 輸入文字(使用 Playwright 的 Auto-wait
if err := p.typeInput(prompt); err != nil { if err := p.typeInput(prompt); err != nil {
return fmt.Errorf("failed to type: %w", err) return fmt.Errorf("failed to type: %w", err)
} }
@ -287,12 +280,13 @@ func (p *PlaywrightProvider) waitForPageReady() error {
// 2. 嘗試多種等待策略 // 2. 嘗試多種等待策略
inputSelectors := []string{ inputSelectors := []string{
".ql-editor.ql-blank",
".ql-editor",
"div[contenteditable='true'][role='textbox']",
"div[contenteditable='true']",
".ProseMirror", ".ProseMirror",
"rich-textarea", "rich-textarea",
"div[role='textbox']",
"div[contenteditable='true']",
"textarea", "textarea",
"input[type='text']",
} }
// 策略 A: 等待任一輸入框出現 // 策略 A: 等待任一輸入框出現
@ -318,12 +312,13 @@ func (p *PlaywrightProvider) waitForPageReady() error {
() => { () => {
// 檢查所有可能的輸入元素 // 檢查所有可能的輸入元素
const selectors = [ const selectors = [
'.ql-editor.ql-blank',
'.ql-editor',
'div[contenteditable="true"][role="textbox"]',
'div[contenteditable="true"]',
'.ProseMirror', '.ProseMirror',
'rich-textarea', 'rich-textarea',
'div[role="textbox"]', 'textarea'
'div[contenteditable="true"]',
'textarea',
'input[type="text"]'
]; ];
for (const sel of selectors) { for (const sel of selectors) {
@ -407,10 +402,12 @@ func (p *PlaywrightProvider) typeInput(text string) error {
fmt.Println("[GeminiWeb] Looking for input field...") fmt.Println("[GeminiWeb] Looking for input field...")
selectors := []string{ selectors := []string{
".ql-editor.ql-blank",
".ql-editor",
"div[contenteditable='true'][role='textbox']",
"div[contenteditable='true']",
".ProseMirror", ".ProseMirror",
"rich-textarea", "rich-textarea",
"div[role='textbox'][contenteditable='true']",
"div[contenteditable='true']",
"textarea", "textarea",
} }
@ -485,15 +482,8 @@ func (p *PlaywrightProvider) sendMessage() error {
// extractResponse 提取回應 // extractResponse 提取回應
func (p *PlaywrightProvider) extractResponse() (string, error) { func (p *PlaywrightProvider) extractResponse() (string, error) {
selectors := []string{
".model-response-text",
".message-content",
".markdown",
".prose",
"model-response",
}
var lastText string var lastText string
var stableCount int
lastUpdate := time.Now() lastUpdate := time.Now()
timeout := 120 * time.Second timeout := 120 * time.Second
startTime := time.Now() startTime := time.Now()
@ -501,46 +491,139 @@ func (p *PlaywrightProvider) extractResponse() (string, error) {
for time.Since(startTime) < timeout { for time.Since(startTime) < timeout {
time.Sleep(500 * time.Millisecond) time.Sleep(500 * time.Millisecond)
// 嘗試所有選擇器 // 使用 JavaScript 提取回應文字(更精確)
for _, sel := range selectors { result, err := p.page.Evaluate(`
locator := p.page.Locator(sel) () => {
count, _ := locator.Count() // 尋找所有可能的回應容器
const selectors = [
'model-response',
'.model-response',
'message-content',
'.message-content'
];
if count > 0 { for (const sel of selectors) {
// 取最後一個元素 const el = document.querySelector(sel);
lastEl := locator.Last() if (el) {
text, err := lastEl.TextContent() // 嘗試找markdown內容
if err != nil { const markdown = el.querySelector('.markdown, .prose, [class*="markdown"]');
continue if (markdown && markdown.innerText.trim()) {
let text = markdown.innerText.trim();
// 移除常見的標籤前綴
text = text.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[:]\s*\n*/i, '').trim();
return { text: text, source: sel + ' .markdown' };
}
// 嘗試找純文字內容(排除標籤)
let textContent = el.innerText.trim();
if (textContent) {
// 移除常見的標籤前綴
textContent = textContent.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[:]\s*\n*/i, '').trim();
return { text: textContent, source: sel };
}
}
} }
return { text: '', source: 'none' };
}
`)
if err == nil {
if m, ok := result.(map[string]interface{}); ok {
text, _ := m["text"].(string)
text = strings.TrimSpace(text) text = strings.TrimSpace(text)
if text != "" && len(text) > len(lastText) { if text != "" && len(text) > len(lastText) {
lastText = text lastText = text
lastUpdate = time.Now() lastUpdate = time.Now()
fmt.Printf("[GeminiWeb] Response length: %d\n", len(text)) stableCount = 0
fmt.Printf("[GeminiWeb] Response: %d chars\n", len(text))
} }
} }
} }
// 檢查是否完成2秒內無新內容 // 檢查是否完成(需要連續 3 次穩定)
if time.Since(lastUpdate) > 2*time.Second && lastText != "" { if time.Since(lastUpdate) > 500*time.Millisecond && lastText != "" {
// 最終檢查:停止按鈕是否還存在 stableCount++
stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止']") if stableCount >= 3 {
count, _ := stopBtn.Count() // 最終檢查:停止按鈕是否還存在
stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止'], button[data-test-id='stop-button']")
count, _ := stopBtn.Count()
if count == 0 { if count == 0 {
return lastText, nil fmt.Println("[GeminiWeb] ✓ Response complete")
return lastText, nil
}
} }
} }
} }
if lastText != "" { if lastText != "" {
fmt.Println("[GeminiWeb] ✓ Response complete (timeout)")
return lastText, nil return lastText, nil
} }
return "", fmt.Errorf("response timeout") return "", fmt.Errorf("response timeout")
} }
// selectModel 選擇 Gemini 模型
// Gemini Web 只有三種模型fast, thinking, pro
func (p *PlaywrightProvider) selectModel(model string) error {
// 映射模型名稱到 Gemini Web 的模型選擇器
modelMap := map[string]string{
"fast": "Fast",
"thinking": "Thinking",
"pro": "Pro",
"gemini-fast": "Fast",
"gemini-thinking": "Thinking",
"gemini-pro": "Pro",
"gemini-2.0-fast": "Fast",
"gemini-2.0-flash": "Fast", // 相容舊名稱
"gemini-2.5-pro": "Pro",
"gemini-2.5-pro-thinking": "Thinking",
}
// 從完整模型名稱中提取類型
modelType := ""
modelLower := strings.ToLower(model)
for key, value := range modelMap {
if strings.Contains(modelLower, strings.ToLower(key)) || modelLower == strings.ToLower(key) {
modelType = value
break
}
}
if modelType == "" {
// 默認使用 Fast
fmt.Printf("[GeminiWeb] Unknown model '%s', defaulting to Fast\n", model)
return nil
}
fmt.Printf("[GeminiWeb] Selecting model: %s\n", modelType)
// 點擊模型選擇器
modelSelector := p.page.Locator("button[aria-label*='Model'], button[aria-label*='模型'], [data-test-id='model-selector']")
if count, _ := modelSelector.Count(); count > 0 {
if err := modelSelector.First().Click(); err != nil {
fmt.Printf("[GeminiWeb] Warning: could not click model selector: %v\n", err)
} else {
time.Sleep(500 * time.Millisecond)
// 選擇對應的模型選項
optionSelector := p.page.Locator(fmt.Sprintf("button:has-text('%s'), [role='menuitem']:has-text('%s')", modelType, modelType))
if count, _ := optionSelector.Count(); count > 0 {
if err := optionSelector.First().Click(); err != nil {
fmt.Printf("[GeminiWeb] Warning: could not select model: %v\n", err)
} else {
fmt.Printf("[GeminiWeb] ✓ Model selected: %s\n", modelType)
time.Sleep(500 * time.Millisecond)
}
}
}
}
return nil
}
// buildPromptFromMessages 從訊息列表建構提示詞 // buildPromptFromMessages 從訊息列表建構提示詞
func buildPromptFromMessagesPlaywright(messages []apitypes.Message) string { func buildPromptFromMessagesPlaywright(messages []apitypes.Message) string {
var prompt string var prompt string