opencode-cursor-agent/internal/providers/geminiweb/page.go

251 lines
5.8 KiB
Go
Raw Normal View History

package geminiweb
import (
"context"
"fmt"
"strings"
"time"
"github.com/go-rod/rod"
)
const geminiURL = "https://gemini.google.com/app"
// 輸入框選擇器(依優先順序)
var inputSelectors = []string{
".ProseMirror",
"rich-textarea",
"div[role='textbox'][contenteditable='true']",
"div[contenteditable='true']",
"textarea",
}
// NavigateToGemini 導航到 Gemini
func NavigateToGemini(page *rod.Page) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := page.Context(ctx).Navigate(geminiURL); err != nil {
return fmt.Errorf("failed to navigate: %w", err)
}
return page.Context(ctx).WaitLoad()
}
// IsLoggedIn 檢查是否已登入
func IsLoggedIn(page *rod.Page) bool {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
for _, sel := range inputSelectors {
if _, err := page.Context(ctx).Element(sel); err == nil {
return true
}
}
return false
}
// SelectModel 選擇模型(可選)
func SelectModel(page *rod.Page, model string) error {
fmt.Printf("[GeminiWeb] Model selection skipped (using current model)\n")
return nil
}
// TypeInput 在輸入框中輸入文字
func TypeInput(page *rod.Page, text string) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
fmt.Println("[GeminiWeb] Looking for input field...")
// 1. 嘗試所有選擇器
var inputEl *rod.Element
var err error
for _, sel := range inputSelectors {
fmt.Printf(" Trying: %s\n", sel)
inputEl, err = page.Context(ctx).Element(sel)
if err == nil {
fmt.Printf(" ✓ Found with: %s\n", sel)
break
}
}
if err != nil {
// 2. Fallback: 嘗試等待頁面載入完成後重試
fmt.Println("[GeminiWeb] Waiting for page to fully load...")
time.Sleep(3 * time.Second)
for _, sel := range inputSelectors {
fmt.Printf(" Retrying: %s\n", sel)
inputEl, err = page.Context(ctx).Element(sel)
if err == nil {
fmt.Printf(" ✓ Found with: %s\n", sel)
break
}
}
}
if err != nil {
// 3. Debug: 印出頁面標題和 URL
info, _ := page.Info()
fmt.Printf("[GeminiWeb] DEBUG: URL=%s Title=%s\n", info.URL, info.Title)
// 4. Fallback: 嘗試更通用的選擇器
fmt.Println("[GeminiWeb] Trying generic selectors...")
genericSelectors := []string{
"div[contenteditable]",
"[contenteditable]",
"textarea",
"input[type='text']",
}
for _, sel := range genericSelectors {
fmt.Printf(" Trying generic: %s\n", sel)
inputEl, err = page.Context(ctx).Element(sel)
if err == nil {
fmt.Printf(" ✓ Found with: %s\n", sel)
break
}
}
}
if err != nil {
info, _ := page.Info()
return fmt.Errorf("input field not found after trying all selectors (URL=%s)", info.URL)
}
// 2. Focus 輸入框
fmt.Printf("[GeminiWeb] Focusing input field...\n")
if err := inputEl.Focus(); err != nil {
return fmt.Errorf("failed to focus input: %w", err)
}
time.Sleep(500 * time.Millisecond)
// 3. 使用 Input 方法
fmt.Printf("[GeminiWeb] Typing %d chars...\n", len(text))
if err := inputEl.Input(text); err != nil {
return fmt.Errorf("failed to input text: %w", err)
}
time.Sleep(200 * time.Millisecond)
fmt.Println("[GeminiWeb] Input complete")
return nil
}
// ClickSend 發送訊息
func ClickSend(page *rod.Page) error {
// 方法 1: 按 Enter
if err := page.Keyboard.Press('\r'); err != nil {
return fmt.Errorf("failed to press Enter: %w", err)
}
time.Sleep(200 * time.Millisecond)
return nil
}
// WaitForReady 等待頁面空閒
func WaitForReady(page *rod.Page) error {
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
fmt.Println("[GeminiWeb] Checking if page is ready...")
for {
select {
case <-ctx.Done():
fmt.Println("[GeminiWeb] Page ready check timeout, proceeding anyway")
return nil
default:
time.Sleep(500 * time.Millisecond)
// 檢查是否有停止按鈕
hasStopBtn := false
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
for _, btn := range stopBtns {
visible, _ := btn.Visible()
if visible {
hasStopBtn = true
break
}
}
if !hasStopBtn {
fmt.Println("[GeminiWeb] Page is ready")
return nil
}
}
}
}
// ExtractResponse 提取回應文字
func ExtractResponse(page *rod.Page) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
var lastText string
lastUpdate := time.Now()
for {
select {
case <-ctx.Done():
if lastText != "" {
return lastText, nil
}
return "", fmt.Errorf("response timeout")
default:
time.Sleep(500 * time.Millisecond)
// 尋找回應文字
for _, sel := range responseSelectors {
elements, err := page.Elements(sel)
if err != nil || len(elements) == 0 {
continue
}
// 取得最後一個元素的文字
lastEl := elements[len(elements)-1]
text, err := lastEl.Text()
if err != nil {
continue
}
text = strings.TrimSpace(text)
if text != "" && text != lastText && len(text) > len(lastText) {
lastText = text
lastUpdate = time.Now()
fmt.Printf("[GeminiWeb] Response length: %d\n", len(text))
}
}
// 檢查是否已完成2 秒內沒有新內容)
if time.Since(lastUpdate) > 2*time.Second && lastText != "" {
// 最後檢查一次是否還有停止按鈕
hasStopBtn := false
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
for _, btn := range stopBtns {
visible, _ := btn.Visible()
if visible {
hasStopBtn = true
break
}
}
if !hasStopBtn {
return lastText, nil
}
}
}
}
}
// 默認的回應選擇器
var responseSelectors = []string{
".model-response-text",
".message-content",
".markdown",
".prose",
"model-response",
}