opencode-cursor-agent/internal/providers/geminiweb/playwright_provider.go

642 lines
18 KiB
Go
Raw Normal View History

package geminiweb
import (
"context"
"cursor-api-proxy/internal/apitypes"
"cursor-api-proxy/internal/config"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/playwright-community/playwright-go"
)
// PlaywrightProvider 使用 Playwright 的 Gemini Provider
type PlaywrightProvider struct {
cfg config.BridgeConfig
pw *playwright.Playwright
browser playwright.Browser
context playwright.BrowserContext
page playwright.Page
mu sync.Mutex
userDataDir string
}
var (
playwrightInstance *playwright.Playwright
playwrightOnce sync.Once
playwrightErr error
)
// NewPlaywrightProvider 建立新的 Playwright Provider
func NewPlaywrightProvider(cfg config.BridgeConfig) (*PlaywrightProvider, error) {
// 確保 Playwright 已初始化(單例)
playwrightOnce.Do(func() {
playwrightInstance, playwrightErr = playwright.Run()
if playwrightErr != nil {
playwrightErr = fmt.Errorf("failed to run playwright: %w", playwrightErr)
}
})
if playwrightErr != nil {
return nil, playwrightErr
}
// 清理 Chrome 鎖檔案
userDataDir := filepath.Join(cfg.GeminiAccountDir, "default-session")
cleanLockFiles(userDataDir)
// 確保目錄存在
if err := os.MkdirAll(userDataDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create user data dir: %w", err)
}
return &PlaywrightProvider{
cfg: cfg,
pw: playwrightInstance,
userDataDir: userDataDir,
}, nil
}
// getName 返回 Provider 名稱
func (p *PlaywrightProvider) Name() string {
return "gemini-web"
}
// launchIfNeeded 如果需要則啟動瀏覽器
func (p *PlaywrightProvider) launchIfNeeded() error {
p.mu.Lock()
defer p.mu.Unlock()
if p.context != nil && p.page != nil {
return nil
}
fmt.Println("[GeminiWeb] Launching Chromium...")
// 使用 LaunchPersistentContext自動保存 session
context, err := p.pw.Chromium.LaunchPersistentContext(p.userDataDir,
playwright.BrowserTypeLaunchPersistentContextOptions{
Headless: playwright.Bool(!p.cfg.GeminiBrowserVisible),
Args: []string{
"--no-first-run",
"--no-default-browser-check",
"--disable-background-networking",
"--disable-extensions",
"--disable-plugins",
"--disable-sync",
},
})
if err != nil {
return fmt.Errorf("failed to launch persistent context: %w", err)
}
p.context = context
// 取得或建立頁面
pages := context.Pages()
if len(pages) > 0 {
p.page = pages[0]
} else {
page, err := context.NewPage()
if err != nil {
_ = context.Close()
return fmt.Errorf("failed to create page: %w", err)
}
p.page = page
}
fmt.Println("[GeminiWeb] Browser launched")
return nil
}
// Generate 生成回應
func (p *PlaywrightProvider) Generate(ctx context.Context, model string, messages []apitypes.Message, tools []apitypes.Tool, cb func(apitypes.StreamChunk)) (err error) {
// 確保在返回錯誤時保存診斷
defer func() {
if err != nil {
fmt.Println("[GeminiWeb] Error occurred, saving diagnostics...")
_ = p.saveDiagnostics()
}
}()
fmt.Printf("[GeminiWeb] Starting generation with model: %s\n", model)
// 1. 確保瀏覽器已啟動
if err := p.launchIfNeeded(); err != nil {
return fmt.Errorf("failed to launch browser: %w", err)
}
// 2. 導航到 Gemini如果需要
currentURL := p.page.URL()
if !strings.Contains(currentURL, "gemini.google.com") {
fmt.Println("[GeminiWeb] Navigating to Gemini...")
if _, err := p.page.Goto("https://gemini.google.com/app", playwright.PageGotoOptions{
WaitUntil: playwright.WaitUntilStateDomcontentloaded,
Timeout: playwright.Float(60000),
}); err != nil {
return fmt.Errorf("failed to navigate: %w", err)
}
// 額外等待 JavaScript 載入
fmt.Println("[GeminiWeb] Waiting for page to initialize...")
time.Sleep(3 * time.Second)
}
// 3. 調試模式:等待用戶確認
if p.cfg.GeminiBrowserVisible {
fmt.Println("\n" + strings.Repeat("=", 70))
fmt.Println("🔍 調試模式:瀏覽器已開啟")
fmt.Println("請檢查瀏覽器畫面,然後按 ENTER 繼續...")
fmt.Println("如果有問題,請查看: /tmp/gemini-debug.*")
fmt.Println(strings.Repeat("=", 70))
var input string
fmt.Scanln(&input)
}
// 4. 等待頁面完全載入project-golem 策略)
fmt.Println("[GeminiWeb] Waiting for page to be ready...")
if err := p.waitForPageReady(); err != nil {
fmt.Printf("[GeminiWeb] Warning: %v\n", err)
// 額外調試:輸出頁面 HTML 結構
if p.cfg.GeminiBrowserVisible {
html, _ := p.page.Content()
debugPath := "/tmp/gemini-debug.html"
if err := os.WriteFile(debugPath, []byte(html), 0644); err == nil {
fmt.Printf("[GeminiWeb] HTML saved to: %s\n", debugPath)
}
}
}
// 4. 檢查登入狀態
fmt.Println("[GeminiWeb] Checking login status...")
loggedIn := p.isLoggedIn()
if !loggedIn {
fmt.Println("[GeminiWeb] Not logged in, continuing anyway")
if p.cfg.GeminiBrowserVisible {
fmt.Println("\n========================================")
fmt.Println("Browser is open. You can:")
fmt.Println("1. Log in to Gemini now")
fmt.Println("2. Continue without login")
fmt.Println("========================================\n")
}
} else {
fmt.Println("[GeminiWeb] ✓ Logged in")
}
// 5. 選擇模型(如果支援)
if err := p.selectModel(model); err != nil {
fmt.Printf("[GeminiWeb] Warning: model selection failed: %v\n", err)
}
// 6. 建構提示詞
prompt := buildPromptFromMessagesPlaywright(messages)
fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt))
// 7. 輸入文字(使用 Playwright 的 Auto-wait
if err := p.typeInput(prompt); err != nil {
return fmt.Errorf("failed to type: %w", err)
}
// 7. 發送訊息
fmt.Println("[GeminiWeb] Sending message...")
if err := p.sendMessage(); err != nil {
return fmt.Errorf("failed to send: %w", err)
}
// 8. 提取回應
fmt.Println("[GeminiWeb] Waiting for response...")
response, err := p.extractResponse()
if err != nil {
return fmt.Errorf("failed to extract response: %w", err)
}
// 9. 回調
cb(apitypes.StreamChunk{Type: apitypes.ChunkText, Text: response})
cb(apitypes.StreamChunk{Type: apitypes.ChunkDone, Done: true})
fmt.Printf("[GeminiWeb] Response complete (%d chars)\n", len(response))
return nil
}
// Close 關閉 Provider
func (p *PlaywrightProvider) Close() error {
p.mu.Lock()
defer p.mu.Unlock()
if p.context != nil {
if err := p.context.Close(); err != nil {
return err
}
p.context = nil
p.page = nil
}
return nil
}
// saveDiagnostics 保存診斷信息
func (p *PlaywrightProvider) saveDiagnostics() error {
if p.page == nil {
return fmt.Errorf("no page available")
}
// 截圖
screenshotPath := "/tmp/gemini-debug.png"
if _, err := p.page.Screenshot(playwright.PageScreenshotOptions{
Path: playwright.String(screenshotPath),
}); err == nil {
fmt.Printf("[GeminiWeb] Screenshot saved: %s\n", screenshotPath)
}
// HTML
htmlPath := "/tmp/gemini-debug.html"
if html, err := p.page.Content(); err == nil {
if err := os.WriteFile(htmlPath, []byte(html), 0644); err == nil {
fmt.Printf("[GeminiWeb] HTML saved: %s\n", htmlPath)
}
}
// 輸出頁面信息
url := p.page.URL()
title, _ := p.page.Title()
fmt.Printf("[GeminiWeb] Diagnostics: URL=%s, Title=%s\n", url, title)
return nil
}
// waitForPageReady 等待頁面完全就緒project-golem 策略)
func (p *PlaywrightProvider) waitForPageReady() error {
fmt.Println("[GeminiWeb] Checking for ready state...")
// 1. 等待停止按鈕消失(如果存在)
_, _ = p.page.WaitForSelector("button[aria-label*='Stop'], button[aria-label*='停止']", playwright.PageWaitForSelectorOptions{
State: playwright.WaitForSelectorStateDetached,
Timeout: playwright.Float(5000),
})
// 2. 嘗試多種等待策略
inputSelectors := []string{
".ql-editor.ql-blank",
".ql-editor",
"div[contenteditable='true'][role='textbox']",
"div[contenteditable='true']",
".ProseMirror",
"rich-textarea",
"textarea",
}
// 策略 A: 等待任一輸入框出現
for i, sel := range inputSelectors {
fmt.Printf(" [%d/%d] Waiting for: %s\n", i+1, len(inputSelectors), sel)
locator := p.page.Locator(sel)
if err := locator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: playwright.Float(5000),
State: playwright.WaitForSelectorStateVisible,
}); err == nil {
fmt.Printf(" ✓ Input field found: %s\n", sel)
return nil
}
}
// 策略 B: 等待頁面完全載入
fmt.Println("[GeminiWeb] Waiting for page load...")
time.Sleep(3 * time.Second)
// 策略 C: 使用 JavaScript 檢查
fmt.Println("[GeminiWeb] Checking with JavaScript...")
result, err := p.page.Evaluate(`
() => {
// 檢查所有可能的輸入元素
const selectors = [
'.ql-editor.ql-blank',
'.ql-editor',
'div[contenteditable="true"][role="textbox"]',
'div[contenteditable="true"]',
'.ProseMirror',
'rich-textarea',
'textarea'
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el) {
return {
found: true,
selector: sel,
tagName: el.tagName,
className: el.className,
visible: el.offsetParent !== null
};
}
}
return { found: false };
}
`)
if err == nil {
if m, ok := result.(map[string]interface{}); ok {
if found, _ := m["found"].(bool); found {
sel, _ := m["selector"].(string)
fmt.Printf(" ✓ JavaScript found: %s\n", sel)
return nil
}
}
}
// 策略 D: 調試模式 - 輸出頁面結構
if p.cfg.GeminiBrowserVisible {
fmt.Println("[GeminiWeb].dump: Page structure analysis")
_, _ = p.page.Evaluate(`
() => {
const allElements = document.querySelectorAll('*');
const inputLike = [];
for (const el of allElements) {
if (el.contentEditable === 'true' ||
el.role === 'textbox' ||
el.tagName === 'TEXTAREA' ||
el.tagName === 'INPUT') {
inputLike.push({
tag: el.tagName,
class: el.className,
id: el.id,
role: el.role,
contentEditable: el.contentEditable
});
}
}
console.log('Input-like elements:', inputLike);
}
`)
}
return fmt.Errorf("no input field found after all strategies")
}
// isLoggedIn 檢查是否已登入
func (p *PlaywrightProvider) isLoggedIn() bool {
// 嘗試找輸入框(登入狀態的主要特徵)
selectors := []string{
".ProseMirror",
"rich-textarea",
"div[role='textbox']",
"div[contenteditable='true']",
"textarea",
}
for _, sel := range selectors {
locator := p.page.Locator(sel)
if count, _ := locator.Count(); count > 0 {
return true
}
}
return false
}
// typeInput 輸入文字(使用 Playwright 的 Auto-wait
func (p *PlaywrightProvider) typeInput(text string) error {
fmt.Println("[GeminiWeb] Looking for input field...")
selectors := []string{
".ql-editor.ql-blank",
".ql-editor",
"div[contenteditable='true'][role='textbox']",
"div[contenteditable='true']",
".ProseMirror",
"rich-textarea",
"textarea",
}
var inputLocator playwright.Locator
var found bool
for _, sel := range selectors {
fmt.Printf(" Trying: %s\n", sel)
locator := p.page.Locator(sel)
if err := locator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: playwright.Float(3000),
}); err == nil {
inputLocator = locator
found = true
fmt.Printf(" ✓ Found with: %s\n", sel)
break
}
}
if !found {
// 錯誤會被 Generate 的 defer 捕獲並保存診斷
url := p.page.URL()
title, _ := p.page.Title()
return fmt.Errorf("input field not found (URL=%s, Title=%s). Diagnostics will be saved to /tmp/", url, title)
}
// Focus 並填充Playwright 自動等待)
fmt.Printf("[GeminiWeb] Typing %d chars...\n", len(text))
if err := inputLocator.Fill(text); err != nil {
return fmt.Errorf("failed to fill: %w", err)
}
fmt.Println("[GeminiWeb] Input complete")
return nil
}
// sendMessage 發送訊息
func (p *PlaywrightProvider) sendMessage() error {
// 方法 1: 按 Enter最可靠
if err := p.page.Keyboard().Press("Enter"); err != nil {
return fmt.Errorf("failed to press Enter: %w", err)
}
time.Sleep(200 * time.Millisecond)
// 方法 2: 嘗試點擊發送按鈕(補強)
_, _ = p.page.Evaluate(`
() => {
const keywords = ['發送', 'Send', '傳送'];
const buttons = Array.from(document.querySelectorAll('button, [role="button"]'));
for (const btn of buttons) {
const text = (btn.innerText || btn.textContent || '').trim();
const label = (btn.getAttribute('aria-label') || '').trim();
// 跳過停止按鈕
if (['停止', 'Stop', '中斷'].includes(text) || label.toLowerCase().includes('stop')) {
continue;
}
if (keywords.some(kw => text.includes(kw) || label.includes(kw))) {
btn.click();
return true;
}
}
return false;
}
`)
return nil
}
// extractResponse 提取回應
func (p *PlaywrightProvider) extractResponse() (string, error) {
var lastText string
var stableCount int
lastUpdate := time.Now()
timeout := 120 * time.Second
startTime := time.Now()
for time.Since(startTime) < timeout {
time.Sleep(500 * time.Millisecond)
// 使用 JavaScript 提取回應文字(更精確)
result, err := p.page.Evaluate(`
() => {
// 尋找所有可能的回應容器
const selectors = [
'model-response',
'.model-response',
'message-content',
'.message-content'
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el) {
// 嘗試找markdown內容
const markdown = el.querySelector('.markdown, .prose, [class*="markdown"]');
if (markdown && markdown.innerText.trim()) {
let text = markdown.innerText.trim();
// 移除常見的標籤前綴
text = text.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[:]\s*\n*/i, '').trim();
return { text: text, source: sel + ' .markdown' };
}
// 嘗試找純文字內容(排除標籤)
let textContent = el.innerText.trim();
if (textContent) {
// 移除常見的標籤前綴
textContent = textContent.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[:]\s*\n*/i, '').trim();
return { text: textContent, source: sel };
}
}
}
return { text: '', source: 'none' };
}
`)
if err == nil {
if m, ok := result.(map[string]interface{}); ok {
text, _ := m["text"].(string)
text = strings.TrimSpace(text)
if text != "" && len(text) > len(lastText) {
lastText = text
lastUpdate = time.Now()
stableCount = 0
fmt.Printf("[GeminiWeb] Response: %d chars\n", len(text))
}
}
}
// 檢查是否完成(需要連續 3 次穩定)
if time.Since(lastUpdate) > 500*time.Millisecond && lastText != "" {
stableCount++
if stableCount >= 3 {
// 最終檢查:停止按鈕是否還存在
stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止'], button[data-test-id='stop-button']")
count, _ := stopBtn.Count()
if count == 0 {
fmt.Println("[GeminiWeb] ✓ Response complete")
return lastText, nil
}
}
}
}
if lastText != "" {
fmt.Println("[GeminiWeb] ✓ Response complete (timeout)")
return lastText, nil
}
return "", fmt.Errorf("response timeout")
}
// selectModel 選擇 Gemini 模型
// Gemini Web 只有三種模型fast, thinking, pro
func (p *PlaywrightProvider) selectModel(model string) error {
// 映射模型名稱到 Gemini Web 的模型選擇器
modelMap := map[string]string{
"fast": "Fast",
"thinking": "Thinking",
"pro": "Pro",
"gemini-fast": "Fast",
"gemini-thinking": "Thinking",
"gemini-pro": "Pro",
"gemini-2.0-fast": "Fast",
"gemini-2.0-flash": "Fast", // 相容舊名稱
"gemini-2.5-pro": "Pro",
"gemini-2.5-pro-thinking": "Thinking",
}
// 從完整模型名稱中提取類型
modelType := ""
modelLower := strings.ToLower(model)
for key, value := range modelMap {
if strings.Contains(modelLower, strings.ToLower(key)) || modelLower == strings.ToLower(key) {
modelType = value
break
}
}
if modelType == "" {
// 默認使用 Fast
fmt.Printf("[GeminiWeb] Unknown model '%s', defaulting to Fast\n", model)
return nil
}
fmt.Printf("[GeminiWeb] Selecting model: %s\n", modelType)
// 點擊模型選擇器
modelSelector := p.page.Locator("button[aria-label*='Model'], button[aria-label*='模型'], [data-test-id='model-selector']")
if count, _ := modelSelector.Count(); count > 0 {
if err := modelSelector.First().Click(); err != nil {
fmt.Printf("[GeminiWeb] Warning: could not click model selector: %v\n", err)
} else {
time.Sleep(500 * time.Millisecond)
// 選擇對應的模型選項
optionSelector := p.page.Locator(fmt.Sprintf("button:has-text('%s'), [role='menuitem']:has-text('%s')", modelType, modelType))
if count, _ := optionSelector.Count(); count > 0 {
if err := optionSelector.First().Click(); err != nil {
fmt.Printf("[GeminiWeb] Warning: could not select model: %v\n", err)
} else {
fmt.Printf("[GeminiWeb] ✓ Model selected: %s\n", modelType)
time.Sleep(500 * time.Millisecond)
}
}
}
}
return nil
}
// buildPromptFromMessages 從訊息列表建構提示詞
func buildPromptFromMessagesPlaywright(messages []apitypes.Message) string {
var prompt string
for _, m := range messages {
switch m.Role {
case "system":
prompt += "System: " + m.Content + "\n\n"
case "user":
prompt += m.Content + "\n\n"
case "assistant":
prompt += "Assistant: " + m.Content + "\n\n"
}
}
return prompt
}