opencode-cursor-agent/internal/providers/geminiweb/page.go

207 lines
4.6 KiB
Go
Raw Normal View History

package geminiweb
import (
"context"
"fmt"
"strings"
"time"
"github.com/go-rod/rod"
)
const geminiURL = "https://gemini.google.com/app"
// 輸入框選擇器(依優先順序)
var inputSelectors = []string{
".ProseMirror",
"rich-textarea",
"div[role='textbox'][contenteditable='true']",
"div[contenteditable='true']",
"textarea",
}
// NavigateToGemini 導航到 Gemini
func NavigateToGemini(page *rod.Page) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := page.Context(ctx).Navigate(geminiURL); err != nil {
return fmt.Errorf("failed to navigate: %w", err)
}
return page.Context(ctx).WaitLoad()
}
// IsLoggedIn 檢查是否已登入
func IsLoggedIn(page *rod.Page) bool {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
for _, sel := range inputSelectors {
if _, err := page.Context(ctx).Element(sel); err == nil {
return true
}
}
return false
}
// SelectModel 選擇模型(可選)
func SelectModel(page *rod.Page, model string) error {
fmt.Printf("[GeminiWeb] Model selection skipped (using current model)\n")
return nil
}
// TypeInput 在輸入框中輸入文字
func TypeInput(page *rod.Page, text string) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// 1. 尋找輸入框
var inputEl *rod.Element
var err error
for _, sel := range inputSelectors {
inputEl, err = page.Context(ctx).Element(sel)
if err == nil {
break
}
}
if err != nil {
return fmt.Errorf("input field not found")
}
// 2. Focus 輸入框
if err := inputEl.Focus(); err != nil {
return fmt.Errorf("failed to focus input: %w", err)
}
time.Sleep(100 * time.Millisecond)
// 3. 使用 Input 方法Rod 的正確方式)
if err := inputEl.Input(text); err != nil {
return fmt.Errorf("failed to input text: %w", err)
}
time.Sleep(100 * time.Millisecond)
// 4. 觸發 Enter 觸發事件
_ = inputEl.SelectAllText()
_ = page.Keyboard.Press('\r') // Enter key
return nil
}
// ClickSend 發送訊息
func ClickSend(page *rod.Page) error {
// 方法 1: 按 Enter
if err := page.Keyboard.Press('\r'); err != nil {
return fmt.Errorf("failed to press Enter: %w", err)
}
time.Sleep(200 * time.Millisecond)
return nil
}
// WaitForReady 等待頁面空閒
func WaitForReady(page *rod.Page) error {
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
fmt.Println("[GeminiWeb] Checking if page is ready...")
for {
select {
case <-ctx.Done():
fmt.Println("[GeminiWeb] Page ready check timeout, proceeding anyway")
return nil
default:
time.Sleep(500 * time.Millisecond)
// 檢查是否有停止按鈕
hasStopBtn := false
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
for _, btn := range stopBtns {
visible, _ := btn.Visible()
if visible {
hasStopBtn = true
break
}
}
if !hasStopBtn {
fmt.Println("[GeminiWeb] Page is ready")
return nil
}
}
}
}
// ExtractResponse 提取回應文字
func ExtractResponse(page *rod.Page) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
var lastText string
lastUpdate := time.Now()
for {
select {
case <-ctx.Done():
if lastText != "" {
return lastText, nil
}
return "", fmt.Errorf("response timeout")
default:
time.Sleep(500 * time.Millisecond)
// 尋找回應文字
for _, sel := range responseSelectors {
elements, err := page.Elements(sel)
if err != nil || len(elements) == 0 {
continue
}
// 取得最後一個元素的文字
lastEl := elements[len(elements)-1]
text, err := lastEl.Text()
if err != nil {
continue
}
text = strings.TrimSpace(text)
if text != "" && text != lastText && len(text) > len(lastText) {
lastText = text
lastUpdate = time.Now()
fmt.Printf("[GeminiWeb] Response length: %d\n", len(text))
}
}
// 檢查是否已完成2 秒內沒有新內容)
if time.Since(lastUpdate) > 2*time.Second && lastText != "" {
// 最後檢查一次是否還有停止按鈕
hasStopBtn := false
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
for _, btn := range stopBtns {
visible, _ := btn.Visible()
if visible {
hasStopBtn = true
break
}
}
if !hasStopBtn {
return lastText, nil
}
}
}
}
}
// 默認的回應選擇器
var responseSelectors = []string{
".model-response-text",
".message-content",
".markdown",
".prose",
"model-response",
}