opencode-cursor-agent/internal/providers/geminiweb/playwright_provider.go

430 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package geminiweb
import (
"context"
"cursor-api-proxy/internal/apitypes"
"cursor-api-proxy/internal/config"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/playwright-community/playwright-go"
)
// PlaywrightProvider 使用 Playwright 的 Gemini Provider
type PlaywrightProvider struct {
cfg config.BridgeConfig
pw *playwright.Playwright
browser playwright.Browser
context playwright.BrowserContext
page playwright.Page
mu sync.Mutex
userDataDir string
}
var (
playwrightInstance *playwright.Playwright
playwrightOnce sync.Once
playwrightErr error
)
// NewPlaywrightProvider 建立新的 Playwright Provider
func NewPlaywrightProvider(cfg config.BridgeConfig) (*PlaywrightProvider, error) {
// 確保 Playwright 已初始化(單例)
playwrightOnce.Do(func() {
playwrightInstance, playwrightErr = playwright.Run()
if playwrightErr != nil {
playwrightErr = fmt.Errorf("failed to run playwright: %w", playwrightErr)
}
})
if playwrightErr != nil {
return nil, playwrightErr
}
// 清理 Chrome 鎖檔案
userDataDir := filepath.Join(cfg.GeminiAccountDir, "default-session")
cleanLockFiles(userDataDir)
// 確保目錄存在
if err := os.MkdirAll(userDataDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create user data dir: %w", err)
}
return &PlaywrightProvider{
cfg: cfg,
pw: playwrightInstance,
userDataDir: userDataDir,
}, nil
}
// getName 返回 Provider 名稱
func (p *PlaywrightProvider) Name() string {
return "gemini-web"
}
// launchIfNeeded 如果需要則啟動瀏覽器
func (p *PlaywrightProvider) launchIfNeeded() error {
p.mu.Lock()
defer p.mu.Unlock()
if p.context != nil && p.page != nil {
return nil
}
fmt.Println("[GeminiWeb] Launching Chromium...")
// 使用 LaunchPersistentContext自動保存 session
context, err := p.pw.Chromium.LaunchPersistentContext(p.userDataDir,
playwright.BrowserTypeLaunchPersistentContextOptions{
Headless: playwright.Bool(!p.cfg.GeminiBrowserVisible),
Args: []string{
"--no-first-run",
"--no-default-browser-check",
"--disable-background-networking",
"--disable-extensions",
"--disable-plugins",
"--disable-sync",
},
})
if err != nil {
return fmt.Errorf("failed to launch persistent context: %w", err)
}
p.context = context
// 取得或建立頁面
pages := context.Pages()
if len(pages) > 0 {
p.page = pages[0]
} else {
page, err := context.NewPage()
if err != nil {
_ = context.Close()
return fmt.Errorf("failed to create page: %w", err)
}
p.page = page
}
fmt.Println("[GeminiWeb] Browser launched")
return nil
}
// Generate 生成回應
func (p *PlaywrightProvider) Generate(ctx context.Context, model string, messages []apitypes.Message, tools []apitypes.Tool, cb func(apitypes.StreamChunk)) error {
fmt.Printf("[GeminiWeb] Starting generation with model: %s\n", model)
// 1. 確保瀏覽器已啟動
if err := p.launchIfNeeded(); err != nil {
return fmt.Errorf("failed to launch browser: %w", err)
}
// 2. 導航到 Gemini如果需要
currentURL := p.page.URL()
if !strings.Contains(currentURL, "gemini.google.com") {
fmt.Println("[GeminiWeb] Navigating to Gemini...")
if _, err := p.page.Goto("https://gemini.google.com/app", playwright.PageGotoOptions{
WaitUntil: playwright.WaitUntilStateNetworkidle,
Timeout: playwright.Float(30000),
}); err != nil {
return fmt.Errorf("failed to navigate: %w", err)
}
}
// 3. 等待頁面完全載入project-golem 策略)
fmt.Println("[GeminiWeb] Waiting for page to be ready...")
if err := p.waitForPageReady(); err != nil {
fmt.Printf("[GeminiWeb] Warning: %v\n", err)
}
// 4. 檢查登入狀態
fmt.Println("[GeminiWeb] Checking login status...")
loggedIn := p.isLoggedIn()
if !loggedIn {
fmt.Println("[GeminiWeb] Not logged in, continuing anyway")
if p.cfg.GeminiBrowserVisible {
fmt.Println("\n========================================")
fmt.Println("Browser is open. You can:")
fmt.Println("1. Log in to Gemini now")
fmt.Println("2. Continue without login")
fmt.Println("========================================\n")
}
} else {
fmt.Println("[GeminiWeb] Logged in")
}
// 5. 建構提示詞
prompt := buildPromptFromMessagesPlaywright(messages)
fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt))
// 6. 輸入文字(使用 Playwright 的 Auto-wait
if err := p.typeInput(prompt); err != nil {
return fmt.Errorf("failed to type: %w", err)
}
// 7. 發送訊息
fmt.Println("[GeminiWeb] Sending message...")
if err := p.sendMessage(); err != nil {
return fmt.Errorf("failed to send: %w", err)
}
// 8. 提取回應
fmt.Println("[GeminiWeb] Waiting for response...")
response, err := p.extractResponse()
if err != nil {
return fmt.Errorf("failed to extract response: %w", err)
}
// 9. 回調
cb(apitypes.StreamChunk{Type: apitypes.ChunkText, Text: response})
cb(apitypes.StreamChunk{Type: apitypes.ChunkDone, Done: true})
fmt.Printf("[GeminiWeb] Response complete (%d chars)\n", len(response))
return nil
}
// Close 關閉 Provider
func (p *PlaywrightProvider) Close() error {
p.mu.Lock()
defer p.mu.Unlock()
if p.context != nil {
if err := p.context.Close(); err != nil {
return err
}
p.context = nil
p.page = nil
}
return nil
}
// waitForPageReady 等待頁面完全就緒project-golem 策略)
func (p *PlaywrightProvider) waitForPageReady() error {
fmt.Println("[GeminiWeb] Checking for ready state...")
// 1. 等待停止按鈕消失(如果存在)
_, _ = p.page.WaitForSelector("button[aria-label*='Stop'], button[aria-label*='停止']", playwright.PageWaitForSelectorOptions{
State: playwright.WaitForSelectorStateDetached,
Timeout: playwright.Float(5000),
})
// 2. 等待輸入框出現(關鍵!)
inputSelectors := []string{
".ProseMirror",
"rich-textarea",
"div[role='textbox']",
"div[contenteditable='true']",
"textarea",
}
var lastErr error
for _, sel := range inputSelectors {
fmt.Printf(" Checking for: %s\n", sel)
locator := p.page.Locator(sel)
if err := locator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: playwright.Float(10000),
State: playwright.WaitForSelectorStateVisible,
}); err == nil {
fmt.Printf(" ✓ Input field found: %s\n", sel)
return nil
} else {
lastErr = err
}
}
// 3. 如果都找不到,給頁面更多時間
fmt.Println("[GeminiWeb] Input not found immediately, waiting longer...")
time.Sleep(3 * time.Second)
for _, sel := range inputSelectors {
locator := p.page.Locator(sel)
if err := locator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: playwright.Float(5000),
State: playwright.WaitForSelectorStateVisible,
}); err == nil {
fmt.Printf(" ✓ Input field found after wait: %s\n", sel)
return nil
}
}
return fmt.Errorf("input field not ready: %w", lastErr)
}
// isLoggedIn 檢查是否已登入
func (p *PlaywrightProvider) isLoggedIn() bool {
// 嘗試找輸入框(登入狀態的主要特徵)
selectors := []string{
".ProseMirror",
"rich-textarea",
"div[role='textbox']",
"div[contenteditable='true']",
"textarea",
}
for _, sel := range selectors {
locator := p.page.Locator(sel)
if count, _ := locator.Count(); count > 0 {
return true
}
}
return false
}
// typeInput 輸入文字(使用 Playwright 的 Auto-wait
func (p *PlaywrightProvider) typeInput(text string) error {
fmt.Println("[GeminiWeb] Looking for input field...")
selectors := []string{
".ProseMirror",
"rich-textarea",
"div[role='textbox'][contenteditable='true']",
"div[contenteditable='true']",
"textarea",
}
var inputLocator playwright.Locator
var found bool
for _, sel := range selectors {
fmt.Printf(" Trying: %s\n", sel)
locator := p.page.Locator(sel)
if err := locator.WaitFor(playwright.LocatorWaitForOptions{
Timeout: playwright.Float(3000),
}); err == nil {
inputLocator = locator
found = true
fmt.Printf(" ✓ Found with: %s\n", sel)
break
}
}
if !found {
// 顯示 debug 信息
url := p.page.URL()
title, _ := p.page.Title()
return fmt.Errorf("input field not found (URL=%s, Title=%s)", url, title)
}
// Focus 並填充Playwright 自動等待)
fmt.Printf("[GeminiWeb] Typing %d chars...\n", len(text))
if err := inputLocator.Fill(text); err != nil {
return fmt.Errorf("failed to fill: %w", err)
}
fmt.Println("[GeminiWeb] Input complete")
return nil
}
// sendMessage 發送訊息
func (p *PlaywrightProvider) sendMessage() error {
// 方法 1: 按 Enter最可靠
if err := p.page.Keyboard().Press("Enter"); err != nil {
return fmt.Errorf("failed to press Enter: %w", err)
}
time.Sleep(200 * time.Millisecond)
// 方法 2: 嘗試點擊發送按鈕(補強)
_, _ = p.page.Evaluate(`
() => {
const keywords = ['發送', 'Send', '傳送'];
const buttons = Array.from(document.querySelectorAll('button, [role="button"]'));
for (const btn of buttons) {
const text = (btn.innerText || btn.textContent || '').trim();
const label = (btn.getAttribute('aria-label') || '').trim();
// 跳過停止按鈕
if (['停止', 'Stop', '中斷'].includes(text) || label.toLowerCase().includes('stop')) {
continue;
}
if (keywords.some(kw => text.includes(kw) || label.includes(kw))) {
btn.click();
return true;
}
}
return false;
}
`)
return nil
}
// extractResponse 提取回應
func (p *PlaywrightProvider) extractResponse() (string, error) {
selectors := []string{
".model-response-text",
".message-content",
".markdown",
".prose",
"model-response",
}
var lastText string
lastUpdate := time.Now()
timeout := 120 * time.Second
startTime := time.Now()
for time.Since(startTime) < timeout {
time.Sleep(500 * time.Millisecond)
// 嘗試所有選擇器
for _, sel := range selectors {
locator := p.page.Locator(sel)
count, _ := locator.Count()
if count > 0 {
// 取最後一個元素
lastEl := locator.Last()
text, err := lastEl.TextContent()
if err != nil {
continue
}
text = strings.TrimSpace(text)
if text != "" && len(text) > len(lastText) {
lastText = text
lastUpdate = time.Now()
fmt.Printf("[GeminiWeb] Response length: %d\n", len(text))
}
}
}
// 檢查是否完成2秒內無新內容
if time.Since(lastUpdate) > 2*time.Second && lastText != "" {
// 最終檢查:停止按鈕是否還存在
stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止']")
count, _ := stopBtn.Count()
if count == 0 {
return lastText, nil
}
}
}
if lastText != "" {
return lastText, nil
}
return "", fmt.Errorf("response timeout")
}
// buildPromptFromMessages 從訊息列表建構提示詞
func buildPromptFromMessagesPlaywright(messages []apitypes.Message) string {
var prompt string
for _, m := range messages {
switch m.Role {
case "system":
prompt += "System: " + m.Content + "\n\n"
case "user":
prompt += m.Content + "\n\n"
case "assistant":
prompt += "Assistant: " + m.Content + "\n\n"
}
}
return prompt
}