opencode-cursor-agent/internal/providers/geminiweb/page.go

207 lines
4.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package geminiweb
import (
"context"
"fmt"
"strings"
"time"
"github.com/go-rod/rod"
)
const geminiURL = "https://gemini.google.com/app"
// 輸入框選擇器(依優先順序)
var inputSelectors = []string{
".ProseMirror",
"rich-textarea",
"div[role='textbox'][contenteditable='true']",
"div[contenteditable='true']",
"textarea",
}
// NavigateToGemini 導航到 Gemini
func NavigateToGemini(page *rod.Page) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := page.Context(ctx).Navigate(geminiURL); err != nil {
return fmt.Errorf("failed to navigate: %w", err)
}
return page.Context(ctx).WaitLoad()
}
// IsLoggedIn 檢查是否已登入
func IsLoggedIn(page *rod.Page) bool {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
for _, sel := range inputSelectors {
if _, err := page.Context(ctx).Element(sel); err == nil {
return true
}
}
return false
}
// SelectModel 選擇模型(可選)
func SelectModel(page *rod.Page, model string) error {
fmt.Printf("[GeminiWeb] Model selection skipped (using current model)\n")
return nil
}
// TypeInput 在輸入框中輸入文字
func TypeInput(page *rod.Page, text string) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// 1. 尋找輸入框
var inputEl *rod.Element
var err error
for _, sel := range inputSelectors {
inputEl, err = page.Context(ctx).Element(sel)
if err == nil {
break
}
}
if err != nil {
return fmt.Errorf("input field not found")
}
// 2. Focus 輸入框
if err := inputEl.Focus(); err != nil {
return fmt.Errorf("failed to focus input: %w", err)
}
time.Sleep(100 * time.Millisecond)
// 3. 使用 Input 方法Rod 的正確方式)
if err := inputEl.Input(text); err != nil {
return fmt.Errorf("failed to input text: %w", err)
}
time.Sleep(100 * time.Millisecond)
// 4. 觸發 Enter 觸發事件
_ = inputEl.SelectAllText()
_ = page.Keyboard.Press('\r') // Enter key
return nil
}
// ClickSend 發送訊息
func ClickSend(page *rod.Page) error {
// 方法 1: 按 Enter
if err := page.Keyboard.Press('\r'); err != nil {
return fmt.Errorf("failed to press Enter: %w", err)
}
time.Sleep(200 * time.Millisecond)
return nil
}
// WaitForReady 等待頁面空閒
func WaitForReady(page *rod.Page) error {
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
fmt.Println("[GeminiWeb] Checking if page is ready...")
for {
select {
case <-ctx.Done():
fmt.Println("[GeminiWeb] Page ready check timeout, proceeding anyway")
return nil
default:
time.Sleep(500 * time.Millisecond)
// 檢查是否有停止按鈕
hasStopBtn := false
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
for _, btn := range stopBtns {
visible, _ := btn.Visible()
if visible {
hasStopBtn = true
break
}
}
if !hasStopBtn {
fmt.Println("[GeminiWeb] Page is ready")
return nil
}
}
}
}
// ExtractResponse 提取回應文字
func ExtractResponse(page *rod.Page) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
var lastText string
lastUpdate := time.Now()
for {
select {
case <-ctx.Done():
if lastText != "" {
return lastText, nil
}
return "", fmt.Errorf("response timeout")
default:
time.Sleep(500 * time.Millisecond)
// 尋找回應文字
for _, sel := range responseSelectors {
elements, err := page.Elements(sel)
if err != nil || len(elements) == 0 {
continue
}
// 取得最後一個元素的文字
lastEl := elements[len(elements)-1]
text, err := lastEl.Text()
if err != nil {
continue
}
text = strings.TrimSpace(text)
if text != "" && text != lastText && len(text) > len(lastText) {
lastText = text
lastUpdate = time.Now()
fmt.Printf("[GeminiWeb] Response length: %d\n", len(text))
}
}
// 檢查是否已完成2 秒內沒有新內容)
if time.Since(lastUpdate) > 2*time.Second && lastText != "" {
// 最後檢查一次是否還有停止按鈕
hasStopBtn := false
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
for _, btn := range stopBtns {
visible, _ := btn.Visible()
if visible {
hasStopBtn = true
break
}
}
if !hasStopBtn {
return lastText, nil
}
}
}
}
}
// 默認的回應選擇器
var responseSelectors = []string{
".model-response-text",
".message-content",
".markdown",
".prose",
"model-response",
}