opencode-cursor-agent/internal/providers/geminiweb/page.go

251 lines
5.8 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package geminiweb
import (
"context"
"fmt"
"strings"
"time"
"github.com/go-rod/rod"
)
const geminiURL = "https://gemini.google.com/app"
// 輸入框選擇器(依優先順序)
var inputSelectors = []string{
".ProseMirror",
"rich-textarea",
"div[role='textbox'][contenteditable='true']",
"div[contenteditable='true']",
"textarea",
}
// NavigateToGemini 導航到 Gemini
func NavigateToGemini(page *rod.Page) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := page.Context(ctx).Navigate(geminiURL); err != nil {
return fmt.Errorf("failed to navigate: %w", err)
}
return page.Context(ctx).WaitLoad()
}
// IsLoggedIn 檢查是否已登入
func IsLoggedIn(page *rod.Page) bool {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
for _, sel := range inputSelectors {
if _, err := page.Context(ctx).Element(sel); err == nil {
return true
}
}
return false
}
// SelectModel 選擇模型(可選)
func SelectModel(page *rod.Page, model string) error {
fmt.Printf("[GeminiWeb] Model selection skipped (using current model)\n")
return nil
}
// TypeInput 在輸入框中輸入文字
func TypeInput(page *rod.Page, text string) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
fmt.Println("[GeminiWeb] Looking for input field...")
// 1. 嘗試所有選擇器
var inputEl *rod.Element
var err error
for _, sel := range inputSelectors {
fmt.Printf(" Trying: %s\n", sel)
inputEl, err = page.Context(ctx).Element(sel)
if err == nil {
fmt.Printf(" ✓ Found with: %s\n", sel)
break
}
}
if err != nil {
// 2. Fallback: 嘗試等待頁面載入完成後重試
fmt.Println("[GeminiWeb] Waiting for page to fully load...")
time.Sleep(3 * time.Second)
for _, sel := range inputSelectors {
fmt.Printf(" Retrying: %s\n", sel)
inputEl, err = page.Context(ctx).Element(sel)
if err == nil {
fmt.Printf(" ✓ Found with: %s\n", sel)
break
}
}
}
if err != nil {
// 3. Debug: 印出頁面標題和 URL
info, _ := page.Info()
fmt.Printf("[GeminiWeb] DEBUG: URL=%s Title=%s\n", info.URL, info.Title)
// 4. Fallback: 嘗試更通用的選擇器
fmt.Println("[GeminiWeb] Trying generic selectors...")
genericSelectors := []string{
"div[contenteditable]",
"[contenteditable]",
"textarea",
"input[type='text']",
}
for _, sel := range genericSelectors {
fmt.Printf(" Trying generic: %s\n", sel)
inputEl, err = page.Context(ctx).Element(sel)
if err == nil {
fmt.Printf(" ✓ Found with: %s\n", sel)
break
}
}
}
if err != nil {
info, _ := page.Info()
return fmt.Errorf("input field not found after trying all selectors (URL=%s)", info.URL)
}
// 2. Focus 輸入框
fmt.Printf("[GeminiWeb] Focusing input field...\n")
if err := inputEl.Focus(); err != nil {
return fmt.Errorf("failed to focus input: %w", err)
}
time.Sleep(500 * time.Millisecond)
// 3. 使用 Input 方法
fmt.Printf("[GeminiWeb] Typing %d chars...\n", len(text))
if err := inputEl.Input(text); err != nil {
return fmt.Errorf("failed to input text: %w", err)
}
time.Sleep(200 * time.Millisecond)
fmt.Println("[GeminiWeb] Input complete")
return nil
}
// ClickSend 發送訊息
func ClickSend(page *rod.Page) error {
// 方法 1: 按 Enter
if err := page.Keyboard.Press('\r'); err != nil {
return fmt.Errorf("failed to press Enter: %w", err)
}
time.Sleep(200 * time.Millisecond)
return nil
}
// WaitForReady 等待頁面空閒
func WaitForReady(page *rod.Page) error {
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
fmt.Println("[GeminiWeb] Checking if page is ready...")
for {
select {
case <-ctx.Done():
fmt.Println("[GeminiWeb] Page ready check timeout, proceeding anyway")
return nil
default:
time.Sleep(500 * time.Millisecond)
// 檢查是否有停止按鈕
hasStopBtn := false
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
for _, btn := range stopBtns {
visible, _ := btn.Visible()
if visible {
hasStopBtn = true
break
}
}
if !hasStopBtn {
fmt.Println("[GeminiWeb] Page is ready")
return nil
}
}
}
}
// ExtractResponse 提取回應文字
func ExtractResponse(page *rod.Page) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
var lastText string
lastUpdate := time.Now()
for {
select {
case <-ctx.Done():
if lastText != "" {
return lastText, nil
}
return "", fmt.Errorf("response timeout")
default:
time.Sleep(500 * time.Millisecond)
// 尋找回應文字
for _, sel := range responseSelectors {
elements, err := page.Elements(sel)
if err != nil || len(elements) == 0 {
continue
}
// 取得最後一個元素的文字
lastEl := elements[len(elements)-1]
text, err := lastEl.Text()
if err != nil {
continue
}
text = strings.TrimSpace(text)
if text != "" && text != lastText && len(text) > len(lastText) {
lastText = text
lastUpdate = time.Now()
fmt.Printf("[GeminiWeb] Response length: %d\n", len(text))
}
}
// 檢查是否已完成2 秒內沒有新內容)
if time.Since(lastUpdate) > 2*time.Second && lastText != "" {
// 最後檢查一次是否還有停止按鈕
hasStopBtn := false
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
for _, btn := range stopBtns {
visible, _ := btn.Visible()
if visible {
hasStopBtn = true
break
}
}
if !hasStopBtn {
return lastText, nil
}
}
}
}
}
// 默認的回應選擇器
var responseSelectors = []string{
".model-response-text",
".message-content",
".markdown",
".prose",
"model-response",
}