opencode-cursor-agent/internal/providers/geminiweb/playwright_provider.go

642 lines
18 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package geminiweb
import (
"context"
"cursor-api-proxy/internal/apitypes"
"cursor-api-proxy/internal/config"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/playwright-community/playwright-go"
)
// PlaywrightProvider 使用 Playwright 的 Gemini Provider
type PlaywrightProvider struct {
cfg config.BridgeConfig
pw *playwright.Playwright
browser playwright.Browser
context playwright.BrowserContext
page playwright.Page
mu sync.Mutex
userDataDir string
}
var (
playwrightInstance *playwright.Playwright
playwrightOnce sync.Once
playwrightErr error
)
// NewPlaywrightProvider 建立新的 Playwright Provider
func NewPlaywrightProvider(cfg config.BridgeConfig) (*PlaywrightProvider, error) {
// 確保 Playwright 已初始化(單例)
playwrightOnce.Do(func() {
playwrightInstance, playwrightErr = playwright.Run()
if playwrightErr != nil {
playwrightErr = fmt.Errorf("failed to run playwright: %w", playwrightErr)
}
})
if playwrightErr != nil {
return nil, playwrightErr
}
// 清理 Chrome 鎖檔案
userDataDir := filepath.Join(cfg.GeminiAccountDir, "default-session")
cleanLockFiles(userDataDir)
// 確保目錄存在
if err := os.MkdirAll(userDataDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create user data dir: %w", err)
}
return &PlaywrightProvider{
cfg: cfg,
pw: playwrightInstance,
userDataDir: userDataDir,
}, nil
}
// getName 返回 Provider 名稱
func (p *PlaywrightProvider) Name() string {
return "gemini-web"
}
// launchIfNeeded 如果需要則啟動瀏覽器
func (p *PlaywrightProvider) launchIfNeeded() error {
p.mu.Lock()
defer p.mu.Unlock()
if p.context != nil && p.page != nil {
return nil
}
fmt.Println("[GeminiWeb] Launching Chromium...")
// 使用 LaunchPersistentContext自動保存 session
context, err := p.pw.Chromium.LaunchPersistentContext(p.userDataDir,
playwright.BrowserTypeLaunchPersistentContextOptions{
Headless: playwright.Bool(!p.cfg.GeminiBrowserVisible),
Args: []string{
"--no-first-run",
"--no-default-browser-check",
"--disable-background-networking",
"--disable-extensions",
"--disable-plugins",
"--disable-sync",
},
})
if err != nil {
return fmt.Errorf("failed to launch persistent context: %w", err)
}
p.context = context
// 取得或建立頁面
pages := context.Pages()
if len(pages) > 0 {
p.page = pages[0]
} else {
page, err := context.NewPage()
if err != nil {
_ = context.Close()
return fmt.Errorf("failed to create page: %w", err)
}
p.page = page
}
fmt.Println("[GeminiWeb] Browser launched")
return nil
}
// Generate 生成回應
func (p *PlaywrightProvider) Generate(ctx context.Context, model string, messages []apitypes.Message, tools []apitypes.Tool, cb func(apitypes.StreamChunk)) (err error) {
// 確保在返回錯誤時保存診斷
defer func() {
if err != nil {
fmt.Println("[GeminiWeb] Error occurred, saving diagnostics...")
_ = p.saveDiagnostics()
}
}()
fmt.Printf("[GeminiWeb] Starting generation with model: %s\n", model)
// 1. 確保瀏覽器已啟動
if err := p.launchIfNeeded(); err != nil {
return fmt.Errorf("failed to launch browser: %w", err)
}
// 2. 導航到 Gemini如果需要
currentURL := p.page.URL()
if !strings.Contains(currentURL, "gemini.google.com") {
fmt.Println("[GeminiWeb] Navigating to Gemini...")
if _, err := p.page.Goto("https://gemini.google.com/app", playwright.PageGotoOptions{
WaitUntil: playwright.WaitUntilStateDomcontentloaded,
Timeout: playwright.Float(60000),
}); err != nil {
return fmt.Errorf("failed to navigate: %w", err)
}
// 額外等待 JavaScript 載入
fmt.Println("[GeminiWeb] Waiting for page to initialize...")
time.Sleep(3 * time.Second)
}
// 3. 調試模式:等待用戶確認
if p.cfg.GeminiBrowserVisible {
fmt.Println("\n" + strings.Repeat("=", 70))
fmt.Println("🔍 調試模式:瀏覽器已開啟")
fmt.Println("請檢查瀏覽器畫面,然後按 ENTER 繼續...")
fmt.Println("如果有問題,請查看: /tmp/gemini-debug.*")
fmt.Println(strings.Repeat("=", 70))
var input string
fmt.Scanln(&input)
}
// 4. 等待頁面完全載入project-golem 策略)
fmt.Println("[GeminiWeb] Waiting for page to be ready...")
if err := p.waitForPageReady(); err != nil {
fmt.Printf("[GeminiWeb] Warning: %v\n", err)
// 額外調試:輸出頁面 HTML 結構
if p.cfg.GeminiBrowserVisible {
html, _ := p.page.Content()
debugPath := "/tmp/gemini-debug.html"
if err := os.WriteFile(debugPath, []byte(html), 0644); err == nil {
fmt.Printf("[GeminiWeb] HTML saved to: %s\n", debugPath)
}
}
}
// 4. 檢查登入狀態
fmt.Println("[GeminiWeb] Checking login status...")
loggedIn := p.isLoggedIn()
if !loggedIn {
fmt.Println("[GeminiWeb] Not logged in, continuing anyway")
if p.cfg.GeminiBrowserVisible {
fmt.Println("\n========================================")
fmt.Println("Browser is open. You can:")
fmt.Println("1. Log in to Gemini now")
fmt.Println("2. Continue without login")
fmt.Println("========================================\n")
}
} else {
fmt.Println("[GeminiWeb] ✓ Logged in")
}
// 5. 選擇模型(如果支援)
if err := p.selectModel(model); err != nil {
fmt.Printf("[GeminiWeb] Warning: model selection failed: %v\n", err)
}
// 6. 建構提示詞
prompt := buildPromptFromMessagesPlaywright(messages)
fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt))
// 7. 輸入文字(使用 Playwright 的 Auto-wait
if err := p.typeInput(prompt); err != nil {
return fmt.Errorf("failed to type: %w", err)
}
// 7. 發送訊息
fmt.Println("[GeminiWeb] Sending message...")
if err := p.sendMessage(); err != nil {
return fmt.Errorf("failed to send: %w", err)
}
// 8. 提取回應
fmt.Println("[GeminiWeb] Waiting for response...")
response, err := p.extractResponse()
if err != nil {
return fmt.Errorf("failed to extract response: %w", err)
}
// 9. 回調
cb(apitypes.StreamChunk{Type: apitypes.ChunkText, Text: response})
cb(apitypes.StreamChunk{Type: apitypes.ChunkDone, Done: true})
fmt.Printf("[GeminiWeb] Response complete (%d chars)\n", len(response))
return nil
}
// Close 關閉 Provider
func (p *PlaywrightProvider) Close() error {
p.mu.Lock()
defer p.mu.Unlock()
if p.context != nil {
if err := p.context.Close(); err != nil {
return err
}
p.context = nil
p.page = nil
}
return nil
}
// saveDiagnostics 保存診斷信息
func (p *PlaywrightProvider) saveDiagnostics() error {
if p.page == nil {
return fmt.Errorf("no page available")
}
// 截圖
screenshotPath := "/tmp/gemini-debug.png"
if _, err := p.page.Screenshot(playwright.PageScreenshotOptions{
Path: playwright.String(screenshotPath),
}); err == nil {
fmt.Printf("[GeminiWeb] Screenshot saved: %s\n", screenshotPath)
}
// HTML
htmlPath := "/tmp/gemini-debug.html"
if html, err := p.page.Content(); err == nil {
if err := os.WriteFile(htmlPath, []byte(html), 0644); err == nil {
fmt.Printf("[GeminiWeb] HTML saved: %s\n", htmlPath)
}
}
// 輸出頁面信息
url := p.page.URL()
title, _ := p.page.Title()
fmt.Printf("[GeminiWeb] Diagnostics: URL=%s, Title=%s\n", url, title)
return nil
}
// waitForPageReady 等待頁面完全就緒project-golem 策略)
func (p *PlaywrightProvider) waitForPageReady() error {
fmt.Println("[GeminiWeb] Checking for ready state...")
// 1. 等待停止按鈕消失(如果存在)
_, _ = p.page.WaitForSelector("button[aria-label*='Stop'], button[aria-label*='停止']", playwright.PageWaitForSelectorOptions{
State: playwright.WaitForSelectorStateDetached,
Timeout: playwright.Float(5000),
})
// 2. 嘗試多種等待策略
inputSelectors := []string{
".ql-editor.ql-blank",
".ql-editor",
"div[contenteditable='true'][role='textbox']",
"div[contenteditable='true']",
".ProseMirror",
"rich-textarea",
"textarea",
}
// 策略 A: 等待任一輸入框出現
for i, sel := range inputSelectors {
fmt.Printf(" [%d/%d] Waiting for: %s\n", i+1, len(inputSelectors), sel)
locator := p.page.Locator(sel)
if err := locator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: playwright.Float(5000),
State: playwright.WaitForSelectorStateVisible,
}); err == nil {
fmt.Printf(" ✓ Input field found: %s\n", sel)
return nil
}
}
// 策略 B: 等待頁面完全載入
fmt.Println("[GeminiWeb] Waiting for page load...")
time.Sleep(3 * time.Second)
// 策略 C: 使用 JavaScript 檢查
fmt.Println("[GeminiWeb] Checking with JavaScript...")
result, err := p.page.Evaluate(`
() => {
// 檢查所有可能的輸入元素
const selectors = [
'.ql-editor.ql-blank',
'.ql-editor',
'div[contenteditable="true"][role="textbox"]',
'div[contenteditable="true"]',
'.ProseMirror',
'rich-textarea',
'textarea'
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el) {
return {
found: true,
selector: sel,
tagName: el.tagName,
className: el.className,
visible: el.offsetParent !== null
};
}
}
return { found: false };
}
`)
if err == nil {
if m, ok := result.(map[string]interface{}); ok {
if found, _ := m["found"].(bool); found {
sel, _ := m["selector"].(string)
fmt.Printf(" ✓ JavaScript found: %s\n", sel)
return nil
}
}
}
// 策略 D: 調試模式 - 輸出頁面結構
if p.cfg.GeminiBrowserVisible {
fmt.Println("[GeminiWeb].dump: Page structure analysis")
_, _ = p.page.Evaluate(`
() => {
const allElements = document.querySelectorAll('*');
const inputLike = [];
for (const el of allElements) {
if (el.contentEditable === 'true' ||
el.role === 'textbox' ||
el.tagName === 'TEXTAREA' ||
el.tagName === 'INPUT') {
inputLike.push({
tag: el.tagName,
class: el.className,
id: el.id,
role: el.role,
contentEditable: el.contentEditable
});
}
}
console.log('Input-like elements:', inputLike);
}
`)
}
return fmt.Errorf("no input field found after all strategies")
}
// isLoggedIn 檢查是否已登入
func (p *PlaywrightProvider) isLoggedIn() bool {
// 嘗試找輸入框(登入狀態的主要特徵)
selectors := []string{
".ProseMirror",
"rich-textarea",
"div[role='textbox']",
"div[contenteditable='true']",
"textarea",
}
for _, sel := range selectors {
locator := p.page.Locator(sel)
if count, _ := locator.Count(); count > 0 {
return true
}
}
return false
}
// typeInput 輸入文字(使用 Playwright 的 Auto-wait
func (p *PlaywrightProvider) typeInput(text string) error {
fmt.Println("[GeminiWeb] Looking for input field...")
selectors := []string{
".ql-editor.ql-blank",
".ql-editor",
"div[contenteditable='true'][role='textbox']",
"div[contenteditable='true']",
".ProseMirror",
"rich-textarea",
"textarea",
}
var inputLocator playwright.Locator
var found bool
for _, sel := range selectors {
fmt.Printf(" Trying: %s\n", sel)
locator := p.page.Locator(sel)
if err := locator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: playwright.Float(3000),
}); err == nil {
inputLocator = locator
found = true
fmt.Printf(" ✓ Found with: %s\n", sel)
break
}
}
if !found {
// 錯誤會被 Generate 的 defer 捕獲並保存診斷
url := p.page.URL()
title, _ := p.page.Title()
return fmt.Errorf("input field not found (URL=%s, Title=%s). Diagnostics will be saved to /tmp/", url, title)
}
// Focus 並填充Playwright 自動等待)
fmt.Printf("[GeminiWeb] Typing %d chars...\n", len(text))
if err := inputLocator.Fill(text); err != nil {
return fmt.Errorf("failed to fill: %w", err)
}
fmt.Println("[GeminiWeb] Input complete")
return nil
}
// sendMessage 發送訊息
func (p *PlaywrightProvider) sendMessage() error {
// 方法 1: 按 Enter最可靠
if err := p.page.Keyboard().Press("Enter"); err != nil {
return fmt.Errorf("failed to press Enter: %w", err)
}
time.Sleep(200 * time.Millisecond)
// 方法 2: 嘗試點擊發送按鈕(補強)
_, _ = p.page.Evaluate(`
() => {
const keywords = ['發送', 'Send', '傳送'];
const buttons = Array.from(document.querySelectorAll('button, [role="button"]'));
for (const btn of buttons) {
const text = (btn.innerText || btn.textContent || '').trim();
const label = (btn.getAttribute('aria-label') || '').trim();
// 跳過停止按鈕
if (['停止', 'Stop', '中斷'].includes(text) || label.toLowerCase().includes('stop')) {
continue;
}
if (keywords.some(kw => text.includes(kw) || label.includes(kw))) {
btn.click();
return true;
}
}
return false;
}
`)
return nil
}
// extractResponse 提取回應
func (p *PlaywrightProvider) extractResponse() (string, error) {
var lastText string
var stableCount int
lastUpdate := time.Now()
timeout := 120 * time.Second
startTime := time.Now()
for time.Since(startTime) < timeout {
time.Sleep(500 * time.Millisecond)
// 使用 JavaScript 提取回應文字(更精確)
result, err := p.page.Evaluate(`
() => {
// 尋找所有可能的回應容器
const selectors = [
'model-response',
'.model-response',
'message-content',
'.message-content'
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el) {
// 嘗試找markdown內容
const markdown = el.querySelector('.markdown, .prose, [class*="markdown"]');
if (markdown && markdown.innerText.trim()) {
let text = markdown.innerText.trim();
// 移除常見的標籤前綴
text = text.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[:]\s*\n*/i, '').trim();
return { text: text, source: sel + ' .markdown' };
}
// 嘗試找純文字內容(排除標籤)
let textContent = el.innerText.trim();
if (textContent) {
// 移除常見的標籤前綴
textContent = textContent.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[:]\s*\n*/i, '').trim();
return { text: textContent, source: sel };
}
}
}
return { text: '', source: 'none' };
}
`)
if err == nil {
if m, ok := result.(map[string]interface{}); ok {
text, _ := m["text"].(string)
text = strings.TrimSpace(text)
if text != "" && len(text) > len(lastText) {
lastText = text
lastUpdate = time.Now()
stableCount = 0
fmt.Printf("[GeminiWeb] Response: %d chars\n", len(text))
}
}
}
// 檢查是否完成(需要連續 3 次穩定)
if time.Since(lastUpdate) > 500*time.Millisecond && lastText != "" {
stableCount++
if stableCount >= 3 {
// 最終檢查:停止按鈕是否還存在
stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止'], button[data-test-id='stop-button']")
count, _ := stopBtn.Count()
if count == 0 {
fmt.Println("[GeminiWeb] ✓ Response complete")
return lastText, nil
}
}
}
}
if lastText != "" {
fmt.Println("[GeminiWeb] ✓ Response complete (timeout)")
return lastText, nil
}
return "", fmt.Errorf("response timeout")
}
// selectModel 選擇 Gemini 模型
// Gemini Web 只有三種模型fast, thinking, pro
func (p *PlaywrightProvider) selectModel(model string) error {
// 映射模型名稱到 Gemini Web 的模型選擇器
modelMap := map[string]string{
"fast": "Fast",
"thinking": "Thinking",
"pro": "Pro",
"gemini-fast": "Fast",
"gemini-thinking": "Thinking",
"gemini-pro": "Pro",
"gemini-2.0-fast": "Fast",
"gemini-2.0-flash": "Fast", // 相容舊名稱
"gemini-2.5-pro": "Pro",
"gemini-2.5-pro-thinking": "Thinking",
}
// 從完整模型名稱中提取類型
modelType := ""
modelLower := strings.ToLower(model)
for key, value := range modelMap {
if strings.Contains(modelLower, strings.ToLower(key)) || modelLower == strings.ToLower(key) {
modelType = value
break
}
}
if modelType == "" {
// 默認使用 Fast
fmt.Printf("[GeminiWeb] Unknown model '%s', defaulting to Fast\n", model)
return nil
}
fmt.Printf("[GeminiWeb] Selecting model: %s\n", modelType)
// 點擊模型選擇器
modelSelector := p.page.Locator("button[aria-label*='Model'], button[aria-label*='模型'], [data-test-id='model-selector']")
if count, _ := modelSelector.Count(); count > 0 {
if err := modelSelector.First().Click(); err != nil {
fmt.Printf("[GeminiWeb] Warning: could not click model selector: %v\n", err)
} else {
time.Sleep(500 * time.Millisecond)
// 選擇對應的模型選項
optionSelector := p.page.Locator(fmt.Sprintf("button:has-text('%s'), [role='menuitem']:has-text('%s')", modelType, modelType))
if count, _ := optionSelector.Count(); count > 0 {
if err := optionSelector.First().Click(); err != nil {
fmt.Printf("[GeminiWeb] Warning: could not select model: %v\n", err)
} else {
fmt.Printf("[GeminiWeb] ✓ Model selected: %s\n", modelType)
time.Sleep(500 * time.Millisecond)
}
}
}
}
return nil
}
// buildPromptFromMessages 從訊息列表建構提示詞
func buildPromptFromMessagesPlaywright(messages []apitypes.Message) string {
var prompt string
for _, m := range messages {
switch m.Role {
case "system":
prompt += "System: " + m.Content + "\n\n"
case "user":
prompt += m.Content + "\n\n"
case "assistant":
prompt += "Assistant: " + m.Content + "\n\n"
}
}
return prompt
}