opencode-cursor-agent/internal/providers/geminiweb/page.go

315 lines
7.9 KiB
Go

package geminiweb
import (
"context"
"fmt"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
)
const geminiURL = "https://gemini.google.com/app"
var modelSelectors = map[string]string{
"gemini-2.0-flash": "Flash",
"gemini-2.5-pro": "Pro",
"gemini-2.5-pro-thinking": "Thinking",
}
func NormalizeModel(model string) string {
if strings.HasPrefix(model, "gemini-") {
return model
}
return "gemini-" + model
}
func GetModelDisplayName(model string) string {
if name, ok := modelSelectors[model]; ok {
return name
}
return "Flash"
}
func NavigateToGemini(page *rod.Page) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := page.Context(ctx).Navigate(geminiURL); err != nil {
return fmt.Errorf("failed to navigate to gemini: %w", err)
}
return page.Context(ctx).WaitLoad()
}
func IsLoggedIn(page *rod.Page) bool {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
// 嘗試多種可能的登入狀態指示器
selectors := []string{
`textarea`,
`[contenteditable="true"]`,
`[aria-label*="chat" i]`,
`button[aria-label*="new" i]`,
}
for _, sel := range selectors {
_, err := page.Context(ctx).Element(sel)
if err == nil {
return true
}
}
return false
}
func SelectModel(page *rod.Page, model string) error {
displayName := GetModelDisplayName(model)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// 嘗試多種可能的模型選擇器選擇器
selectors := []string{
`button[aria-label*="model" i]`,
`button[aria-label*="Model" i]`,
`[data-test-id="model-selector"]`,
`button[aria-haspopup="listbox"]`,
`[class*="model-selector"]`,
`[class*="model"] button`,
}
var modelSwitcher *rod.Element
var err error
for _, sel := range selectors {
modelSwitcher, err = page.Context(ctx).Element(sel)
if err == nil {
break
}
}
if err != nil {
// 如果找不到模型選擇器,可能是頁面已經在正確的模型上,或是 Gemini 的 UI 不同
fmt.Printf("Warning: model selector not found, using current model (requested: %s)\n", displayName)
return nil
}
// 獲取目前的模型文字
currentText, _ := modelSwitcher.Text()
if currentText != "" && strings.Contains(strings.ToLower(currentText), strings.ToLower(displayName)) {
// 已經在正確的模型上
return nil
}
if err := modelSwitcher.Click(proto.InputMouseButtonLeft, 1); err != nil {
return fmt.Errorf("failed to click model selector: %w", err)
}
time.Sleep(500 * time.Millisecond)
// 嘗試多種可能的選項選擇器
optionSelectors := []string{
fmt.Sprintf(`[aria-label*="%s" i]`, displayName),
fmt.Sprintf(`[data-value*="%s" i]`, displayName),
fmt.Sprintf(`text=%s`, displayName),
`[role="option"]`,
}
var option *rod.Element
for _, sel := range optionSelectors {
option, err = page.Context(ctx).Element(sel)
if err == nil {
break
}
}
if err != nil {
fmt.Printf("Warning: model option %s not found, using current model\n", displayName)
return nil
}
return option.Click(proto.InputMouseButtonLeft, 1)
}
func SendPrompt(page *rod.Page, prompt string) error {
fmt.Printf("[GeminiWeb] Finding input field...\n")
// 嘗試多種可能的輸入框選擇器
selectors := []string{
`textarea`,
`[contenteditable="true"]`,
`[role="textbox"]`,
`div[contenteditable="true"]`,
`div[role="textbox"]`,
`.ql-editor`,
`rich-textarea`,
}
var textarea *rod.Element
var err error
for _, sel := range selectors {
fmt.Printf(" Trying selector: %s\n", sel)
textarea, err = page.Element(sel)
if err == nil {
fmt.Printf(" Found with: %s\n", sel)
break
}
}
if err != nil {
return fmt.Errorf("input field not found after trying all selectors: %w", err)
}
fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt))
if err := textarea.Input(prompt); err != nil {
return fmt.Errorf("failed to input prompt: %w", err)
}
fmt.Printf("[GeminiWeb] Finding send button...\n")
time.Sleep(500 * time.Millisecond)
// 嘗試多種可能的發送按鈕選擇器
btnSelectors := []string{
`button[type="submit"]`,
`button[aria-label*="Send" i]`,
`button[aria-label*="submit" i]`,
`button:has(svg)`,
`button`,
}
var sendBtn *rod.Element
for _, sel := range btnSelectors {
fmt.Printf(" Trying button selector: %s\n", sel)
sendBtn, err = page.Element(sel)
if err == nil {
// 檢查是否是發送按鈕(不是其他按鈕)
ariaLabel, _ := sendBtn.Attribute("aria-label")
text, _ := sendBtn.Text()
if ariaLabel != nil || text != "" {
fmt.Printf(" Found button with aria-label=%v text=%s\n", ariaLabel, truncate(text, 20))
}
}
if err == nil {
fmt.Printf(" Found send button with: %s\n", sel)
break
}
}
if err != nil {
// 嘗試按 Enter 鍵發送
fmt.Printf("[GeminiWeb] No send button found, trying Enter key...\n")
if err := page.Keyboard.Press('\n'); err != nil {
return fmt.Errorf("failed to press Enter: %w", err)
}
return nil
}
fmt.Printf("[GeminiWeb] Clicking send button...\n")
return sendBtn.Click(proto.InputMouseButtonLeft, 1)
}
func truncate(s string, max int) string {
if len(s) <= max {
return s
}
return s[:max] + "..."
}
func WaitForResponse(page *rod.Page, onChunk func(text string), onThinking func(thinking string), onComplete func()) error {
lastText := ""
lastThinking := ""
responseComplete := false
timeout := time.NewTimer(120 * time.Second)
defer timeout.Stop()
ticker := time.NewTicker(500 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-timeout.C:
return fmt.Errorf("response timeout")
case <-ticker.C:
textChanged := false
responseEls, err := page.Elements(`.response-text, message-content, .model-response, div[data-test-id="response"]`)
if err == nil && len(responseEls) > 0 {
for _, el := range responseEls {
text, _ := el.Text()
text = strings.TrimSpace(text)
if text != "" && text != lastText {
if strings.Contains(text, lastText) {
newPart := strings.TrimPrefix(text, lastText)
if newPart != "" {
onChunk(newPart)
}
} else {
onChunk(text)
}
lastText = text
textChanged = true
}
}
}
thinkingEls, err := page.Elements(`.thinking-content, .thought-text, div[data-test-id="thinking"]`)
if err == nil && len(thinkingEls) > 0 {
for _, el := range thinkingEls {
thinking, _ := el.Text()
thinking = strings.TrimSpace(thinking)
if thinking != "" && thinking != lastThinking {
if strings.Contains(thinking, lastThinking) {
newPart := strings.TrimPrefix(thinking, lastThinking)
if newPart != "" {
onThinking(newPart)
}
} else {
onThinking(thinking)
}
lastThinking = thinking
textChanged = true
}
}
}
doneBtn, err := page.Element(`button[aria-label*="stop"], button[aria-label*="regenerate"]`)
if err == nil && doneBtn != nil {
ariaLabel, _ := doneBtn.Attribute("aria-label")
if ariaLabel != nil && (*ariaLabel == "Stop" || strings.Contains(*ariaLabel, "regenerate")) {
if !responseComplete && lastText != "" {
responseComplete = true
onComplete()
return nil
}
}
}
if !textChanged && responseComplete {
return nil
}
}
}
}
func IsRateLimited(page *rod.Page) bool {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
el, err := page.Context(ctx).Element(`[class*="rate-limit"], [class*="quota"], [data-test-id="rate-limited"]`)
return err == nil && el != nil
}
func GetRateLimitMessage(page *rod.Page) string {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
el, err := page.Context(ctx).Element(`[class*="rate-limit"], [class*="quota"], [class*="error-message"]`)
if err != nil || el == nil {
return ""
}
text, _ := el.Text()
return strings.TrimSpace(text)
}