fix: Improve Gemini Web DOM selectors and add debug output
- Make model selector optional (skip if not found, use current model) - Add multiple fallback selectors for input field and send button - Add debug logging to trace execution flow - Improve error messages to suggest running gemini-login
This commit is contained in:
parent
19985dd476
commit
69df57555d
Binary file not shown.
|
|
@ -46,8 +46,21 @@ func IsLoggedIn(page *rod.Page) bool {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
_, err := page.Context(ctx).Element(`[aria-label*="New chat"], [data-test-id*="new-chat"], button[aria-label*="chat"]`)
|
// 嘗試多種可能的登入狀態指示器
|
||||||
return err == nil
|
selectors := []string{
|
||||||
|
`textarea`,
|
||||||
|
`[contenteditable="true"]`,
|
||||||
|
`[aria-label*="chat" i]`,
|
||||||
|
`button[aria-label*="new" i]`,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sel := range selectors {
|
||||||
|
_, err := page.Context(ctx).Element(sel)
|
||||||
|
if err == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func SelectModel(page *rod.Page, model string) error {
|
func SelectModel(page *rod.Page, model string) error {
|
||||||
|
|
@ -56,9 +69,36 @@ func SelectModel(page *rod.Page, model string) error {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
modelSwitcher, err := page.Context(ctx).Element(`button[aria-label*="model"], [data-test-id="model-selector"], button[aria-haspopup="listbox"]`)
|
// 嘗試多種可能的模型選擇器選擇器
|
||||||
|
selectors := []string{
|
||||||
|
`button[aria-label*="model" i]`,
|
||||||
|
`button[aria-label*="Model" i]`,
|
||||||
|
`[data-test-id="model-selector"]`,
|
||||||
|
`button[aria-haspopup="listbox"]`,
|
||||||
|
`[class*="model-selector"]`,
|
||||||
|
`[class*="model"] button`,
|
||||||
|
}
|
||||||
|
|
||||||
|
var modelSwitcher *rod.Element
|
||||||
|
var err error
|
||||||
|
for _, sel := range selectors {
|
||||||
|
modelSwitcher, err = page.Context(ctx).Element(sel)
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("model selector not found: %w", err)
|
// 如果找不到模型選擇器,可能是頁面已經在正確的模型上,或是 Gemini 的 UI 不同
|
||||||
|
fmt.Printf("Warning: model selector not found, using current model (requested: %s)\n", displayName)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// 獲取目前的模型文字
|
||||||
|
currentText, _ := modelSwitcher.Text()
|
||||||
|
if currentText != "" && strings.Contains(strings.ToLower(currentText), strings.ToLower(displayName)) {
|
||||||
|
// 已經在正確的模型上
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := modelSwitcher.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
if err := modelSwitcher.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||||||
|
|
@ -67,18 +107,52 @@ func SelectModel(page *rod.Page, model string) error {
|
||||||
|
|
||||||
time.Sleep(500 * time.Millisecond)
|
time.Sleep(500 * time.Millisecond)
|
||||||
|
|
||||||
option, err := page.Context(ctx).Element(fmt.Sprintf(`[aria-label*="%s"], [data-value="%s"]`, displayName, displayName))
|
// 嘗試多種可能的選項選擇器
|
||||||
|
optionSelectors := []string{
|
||||||
|
fmt.Sprintf(`[aria-label*="%s" i]`, displayName),
|
||||||
|
fmt.Sprintf(`[data-value*="%s" i]`, displayName),
|
||||||
|
fmt.Sprintf(`text=%s`, displayName),
|
||||||
|
`[role="option"]`,
|
||||||
|
}
|
||||||
|
|
||||||
|
var option *rod.Element
|
||||||
|
for _, sel := range optionSelectors {
|
||||||
|
option, err = page.Context(ctx).Element(sel)
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("model option %s not found: %w", displayName, err)
|
fmt.Printf("Warning: model option %s not found, using current model\n", displayName)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return option.Click(proto.InputMouseButtonLeft, 1)
|
return option.Click(proto.InputMouseButtonLeft, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
func SendPrompt(page *rod.Page, prompt string) error {
|
func SendPrompt(page *rod.Page, prompt string) error {
|
||||||
textarea, err := page.Element(`textarea[aria-label*="message"], textarea[placeholder*="message"], rich-textarea, .ql-editor, div[contenteditable="true"]`)
|
// 嘗試多種可能的輸入框選擇器
|
||||||
|
selectors := []string{
|
||||||
|
`textarea[aria-label*="message" i]`,
|
||||||
|
`textarea[placeholder*="message" i]`,
|
||||||
|
`textarea`,
|
||||||
|
`[contenteditable="true"]`,
|
||||||
|
`[role="textbox"]`,
|
||||||
|
`rich-textarea`,
|
||||||
|
}
|
||||||
|
|
||||||
|
var textarea *rod.Element
|
||||||
|
var err error
|
||||||
|
for _, sel := range selectors {
|
||||||
|
textarea, err = page.Element(sel)
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("input field not found: %w", err)
|
return fmt.Errorf("input field not found after trying selectors %v: %w", selectors, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := textarea.Input(prompt); err != nil {
|
if err := textarea.Input(prompt); err != nil {
|
||||||
|
|
@ -87,7 +161,22 @@ func SendPrompt(page *rod.Page, prompt string) error {
|
||||||
|
|
||||||
time.Sleep(300 * time.Millisecond)
|
time.Sleep(300 * time.Millisecond)
|
||||||
|
|
||||||
sendBtn, err := page.Element(`button[aria-label*="Send"], button[aria-label*="submit"], button[type="submit"]`)
|
// 嘗試多種可能的發送按鈕選擇器
|
||||||
|
btnSelectors := []string{
|
||||||
|
`button[aria-label*="Send" i]`,
|
||||||
|
`button[aria-label*="submit" i]`,
|
||||||
|
`button[type="submit"]`,
|
||||||
|
`button svg`,
|
||||||
|
}
|
||||||
|
|
||||||
|
var sendBtn *rod.Element
|
||||||
|
for _, sel := range btnSelectors {
|
||||||
|
sendBtn, err = page.Element(sel)
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("send button not found: %w", err)
|
return fmt.Errorf("send button not found: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,15 +43,19 @@ func (p *Provider) initPool() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Provider) Generate(ctx context.Context, model string, messages []apitypes.Message, tools []apitypes.Tool, cb func(apitypes.StreamChunk)) error {
|
func (p *Provider) Generate(ctx context.Context, model string, messages []apitypes.Message, tools []apitypes.Tool, cb func(apitypes.StreamChunk)) error {
|
||||||
|
fmt.Printf("[GeminiWeb] Starting generation with model: %s\n", model)
|
||||||
|
|
||||||
if err := p.initPool(); err != nil {
|
if err := p.initPool(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
session := p.pool.GetAvailable()
|
session := p.pool.GetAvailable()
|
||||||
if session == nil {
|
if session == nil {
|
||||||
return fmt.Errorf("no available sessions")
|
return fmt.Errorf("no available sessions - please run 'gemini-login <session-name>' first")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fmt.Printf("[GeminiWeb] Using session: %s\n", session.Name)
|
||||||
|
|
||||||
p.pool.StartSession(session)
|
p.pool.StartSession(session)
|
||||||
defer p.pool.EndSession(session)
|
defer p.pool.EndSession(session)
|
||||||
|
|
||||||
|
|
@ -67,34 +71,45 @@ func (p *Provider) Generate(ctx context.Context, model string, messages []apityp
|
||||||
}
|
}
|
||||||
|
|
||||||
if session.CookieFile != "" {
|
if session.CookieFile != "" {
|
||||||
|
fmt.Printf("[GeminiWeb] Loading cookies from: %s\n", session.CookieFile)
|
||||||
cookies, err := LoadCookiesFromFile(session.CookieFile)
|
cookies, err := LoadCookiesFromFile(session.CookieFile)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if err := SetCookiesOnPage(page, cookies); err != nil {
|
if err := SetCookiesOnPage(page, cookies); err != nil {
|
||||||
return fmt.Errorf("failed to set cookies: %w", err)
|
return fmt.Errorf("failed to set cookies: %w", err)
|
||||||
}
|
}
|
||||||
|
fmt.Printf("[GeminiWeb] Loaded %d cookies\n", len(cookies))
|
||||||
|
} else {
|
||||||
|
fmt.Printf("[GeminiWeb] Warning: could not load cookies: %v\n", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fmt.Printf("[GeminiWeb] Navigating to Gemini...\n")
|
||||||
if err := NavigateToGemini(page); err != nil {
|
if err := NavigateToGemini(page); err != nil {
|
||||||
return fmt.Errorf("failed to navigate: %w", err)
|
return fmt.Errorf("failed to navigate: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(2 * time.Second)
|
time.Sleep(2 * time.Second)
|
||||||
|
|
||||||
|
fmt.Printf("[GeminiWeb] Checking login status...\n")
|
||||||
if !IsLoggedIn(page) {
|
if !IsLoggedIn(page) {
|
||||||
return fmt.Errorf("session not logged in, please run gemini-login first")
|
return fmt.Errorf("session not logged in - please run 'gemini-login %s' first", session.Name)
|
||||||
}
|
}
|
||||||
|
fmt.Printf("[GeminiWeb] Logged in successfully\n")
|
||||||
|
|
||||||
|
fmt.Printf("[GeminiWeb] Selecting model: %s\n", model)
|
||||||
if err := SelectModel(page, model); err != nil {
|
if err := SelectModel(page, model); err != nil {
|
||||||
return fmt.Errorf("failed to select model: %w", err)
|
return fmt.Errorf("failed to select model: %w", err)
|
||||||
}
|
}
|
||||||
|
fmt.Printf("[GeminiWeb] Model selected\n")
|
||||||
|
|
||||||
time.Sleep(500 * time.Millisecond)
|
time.Sleep(500 * time.Millisecond)
|
||||||
|
|
||||||
prompt := buildPromptFromMessages(messages)
|
prompt := buildPromptFromMessages(messages)
|
||||||
|
fmt.Printf("[GeminiWeb] Sending prompt (length: %d chars)\n", len(prompt))
|
||||||
if err := SendPrompt(page, prompt); err != nil {
|
if err := SendPrompt(page, prompt); err != nil {
|
||||||
return fmt.Errorf("failed to send prompt: %w", err)
|
return fmt.Errorf("failed to send prompt: %w", err)
|
||||||
}
|
}
|
||||||
|
fmt.Printf("[GeminiWeb] Prompt sent, waiting for response...\n")
|
||||||
|
|
||||||
return WaitForResponse(page,
|
return WaitForResponse(page,
|
||||||
func(text string) {
|
func(text string) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,159 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cursor-api-proxy/internal/providers/geminiweb"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/go-rod/rod"
|
||||||
|
"github.com/go-rod/rod/lib/launcher"
|
||||||
|
"github.com/go-rod/rod/lib/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
fmt.Println("Starting Gemini DOM detection...")
|
||||||
|
fmt.Println("This will open a browser and analyze the Gemini web interface.")
|
||||||
|
fmt.Println()
|
||||||
|
|
||||||
|
// 啟動可見瀏覽器
|
||||||
|
l := launcher.New().Headless(false)
|
||||||
|
url, err := l.Launch()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Failed to launch browser: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
browser := rod.New().ControlURL(url)
|
||||||
|
if err := browser.Connect(); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Failed to connect browser: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer browser.Close()
|
||||||
|
|
||||||
|
page, err := browser.Page(proto.TargetCreateTarget{URL: "about:blank"})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Failed to create page: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 載入 cookies(如果有)
|
||||||
|
home, _ := os.UserHomeDir()
|
||||||
|
cookieFile := home + "/.cursor-api-proxy/gemini-accounts/session-1/cookies.json"
|
||||||
|
if _, err := os.Stat(cookieFile); err == nil {
|
||||||
|
cookies, err := geminiweb.LoadCookiesFromFile(cookieFile)
|
||||||
|
if err == nil {
|
||||||
|
geminiweb.SetCookiesOnPage(page, cookies)
|
||||||
|
fmt.Println("Loaded existing cookies")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 導航到 Gemini
|
||||||
|
fmt.Println("Navigating to gemini.google.com...")
|
||||||
|
if err := geminiweb.NavigateToGemini(page); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Failed to navigate: %v\n", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println("Browser is now open. Please:")
|
||||||
|
fmt.Println("1. Log in if needed")
|
||||||
|
fmt.Println("2. Wait for the chat interface to fully load")
|
||||||
|
fmt.Println("3. Look for the model selector dropdown")
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println("Press Enter to analyze the DOM...")
|
||||||
|
fmt.Scanln()
|
||||||
|
|
||||||
|
// 分析 DOM
|
||||||
|
analyzeDOM(page)
|
||||||
|
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println("Press Enter to close...")
|
||||||
|
fmt.Scanln()
|
||||||
|
}
|
||||||
|
|
||||||
|
func analyzeDOM(page *rod.Page) {
|
||||||
|
fmt.Println("=== DOM Analysis ===")
|
||||||
|
fmt.Println()
|
||||||
|
|
||||||
|
// 尋找可能的輸入框
|
||||||
|
fmt.Println("Looking for input fields...")
|
||||||
|
selectors := []string{
|
||||||
|
`textarea`,
|
||||||
|
`[contenteditable="true"]`,
|
||||||
|
`[role="textbox"]`,
|
||||||
|
`input[type="text"]`,
|
||||||
|
}
|
||||||
|
for _, sel := range selectors {
|
||||||
|
elements, err := page.Elements(sel)
|
||||||
|
if err == nil && len(elements) > 0 {
|
||||||
|
fmt.Printf(" Found %d elements with: %s\n", len(elements), sel)
|
||||||
|
for i, el := range elements {
|
||||||
|
tag, _ := el.Property("tagName")
|
||||||
|
class, _ := el.Attribute("class")
|
||||||
|
ariaLabel, _ := el.Attribute("aria-label")
|
||||||
|
placeholder, _ := el.Attribute("placeholder")
|
||||||
|
fmt.Printf(" [%d] tag=%s class=%s aria-label=%s placeholder=%s\n",
|
||||||
|
i, tag, class, ariaLabel, placeholder)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 尋找可能的發送按鈕
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println("Looking for send buttons...")
|
||||||
|
buttonSelectors := []string{
|
||||||
|
`button`,
|
||||||
|
`[role="button"]`,
|
||||||
|
`[type="submit"]`,
|
||||||
|
}
|
||||||
|
for _, sel := range buttonSelectors {
|
||||||
|
elements, err := page.Elements(sel)
|
||||||
|
if err == nil && len(elements) > 0 {
|
||||||
|
fmt.Printf(" Found %d elements with: %s\n", len(elements), sel)
|
||||||
|
for i, el := range elements {
|
||||||
|
if i >= 5 {
|
||||||
|
fmt.Printf(" ... and %d more\n", len(elements)-5)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
tag, _ := el.Property("tagName")
|
||||||
|
class, _ := el.Attribute("class")
|
||||||
|
ariaLabel, _ := el.Attribute("aria-label")
|
||||||
|
text, _ := el.Text()
|
||||||
|
text = truncate(text, 30)
|
||||||
|
fmt.Printf(" [%d] tag=%s class=%s aria-label=%s text=%s\n",
|
||||||
|
i, tag, class, ariaLabel, text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 尋找模型選擇器
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println("Looking for model selector...")
|
||||||
|
modelSelectors := []string{
|
||||||
|
`[aria-label*="model"]`,
|
||||||
|
`[aria-label*="Model"]`,
|
||||||
|
`button[aria-haspopup]`,
|
||||||
|
`[data-test-id*="model"]`,
|
||||||
|
`[class*="model"]`,
|
||||||
|
}
|
||||||
|
for _, sel := range modelSelectors {
|
||||||
|
elements, err := page.Elements(sel)
|
||||||
|
if err == nil && len(elements) > 0 {
|
||||||
|
fmt.Printf(" Found with: %s\n", sel)
|
||||||
|
for i, el := range elements {
|
||||||
|
tag, _ := el.Property("tagName")
|
||||||
|
class, _ := el.Attribute("class")
|
||||||
|
ariaLabel, _ := el.Attribute("aria-label")
|
||||||
|
text, _ := el.Text()
|
||||||
|
fmt.Printf(" [%d] tag=%s class=%s aria-label=%s text=%s\n",
|
||||||
|
i, tag, class, ariaLabel, truncate(text, 30))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func truncate(s string, max int) string {
|
||||||
|
if len(s) <= max {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return s[:max] + "..."
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue