refactor: Complete rewrite of GeminiWeb provider

Based on project-golem implementation:

Phase 1: Browser Persistence
- Add BrowserManager singleton to manage browser lifecycle
- Use launchPersistentContext with UserDataDir
- Auto-save cookies and session state
- Clean Chrome lock files on startup
- Single browser instance reused across requests

Phase 2: Improved DOM Interaction
- Use correct input selectors (ProseMirror first)
- Implement 'Physical Enter' send method
- Trigger input/change/keyup events properly
- Check for 'Stop' button to detect busy state

Phase 3: Session Management
- No manual cookie saving/loading needed
- Session pool just manages userDataDir paths
- Default session directory structure

Breaking changes:
- Remove manual cookie management
- Browser stays open between requests
- Simpler session management
This commit is contained in:
王性驊 2026-04-03 01:05:54 +08:00
parent 24459ffcfe
commit 32673c028e
3 changed files with 403 additions and 394 deletions

View File

@ -0,0 +1,173 @@
package geminiweb
import (
"fmt"
"os"
"path/filepath"
"sync"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/proto"
)
// BrowserManager 管理瀏覽器實例的生命週期
type BrowserManager struct {
mu sync.Mutex
browser *rod.Browser
userDataDir string
page *rod.Page
visible bool
isRunning bool
currentModel string
}
var (
globalManager *BrowserManager
globalMu sync.Mutex
)
// GetBrowserManager 獲取全域瀏覽器管理器(單例)
func GetBrowserManager(userDataDir string, visible bool) (*BrowserManager, error) {
globalMu.Lock()
defer globalMu.Unlock()
if globalManager != nil {
return globalManager, nil
}
manager, err := NewBrowserManager(userDataDir, visible)
if err != nil {
return nil, err
}
globalManager = manager
return globalManager, nil
}
// NewBrowserManager 建立新的瀏覽器管理器
func NewBrowserManager(userDataDir string, visible bool) (*BrowserManager, error) {
cleanLockFiles(userDataDir)
if err := os.MkdirAll(userDataDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create user data dir: %w", err)
}
return &BrowserManager{
userDataDir: userDataDir,
visible: visible,
}, nil
}
// cleanLockFiles 清理 Chrome 的殘留鎖檔案
func cleanLockFiles(userDataDir string) {
lockFiles := []string{
"SingletonLock",
"SingletonCookie",
"SingletonSocket",
"Default/SingletonLock",
"Default/SingletonCookie",
"Default/SingletonSocket",
}
for _, file := range lockFiles {
path := filepath.Join(userDataDir, file)
os.Remove(path)
}
}
// Launch 啟動瀏覽器(如果尚未啟動)
func (m *BrowserManager) Launch() error {
m.mu.Lock()
defer m.mu.Unlock()
if m.isRunning && m.browser != nil {
return nil
}
l := launcher.New()
if m.visible {
l = l.Headless(false)
} else {
l = l.Headless(true)
}
l = l.UserDataDir(m.userDataDir)
url, err := l.Launch()
if err != nil {
return fmt.Errorf("failed to launch browser: %w", err)
}
b := rod.New().ControlURL(url)
if err := b.Connect(); err != nil {
return fmt.Errorf("failed to connect browser: %w", err)
}
m.browser = b
page, err := b.Page(proto.TargetCreateTarget{URL: "about:blank"})
if err != nil {
_ = b.Close()
return fmt.Errorf("failed to create page: %w", err)
}
m.page = page
m.isRunning = true
return nil
}
// GetPage 獲取頁面
func (m *BrowserManager) GetPage() (*rod.Page, error) {
m.mu.Lock()
defer m.mu.Unlock()
if !m.isRunning || m.browser == nil {
return nil, fmt.Errorf("browser not running")
}
return m.page, nil
}
// Close 關閉瀏覽器
func (m *BrowserManager) Close() error {
m.mu.Lock()
defer m.mu.Unlock()
if !m.isRunning {
return nil
}
var err error
if m.browser != nil {
err = m.browser.Close()
m.browser = nil
}
m.page = nil
m.isRunning = false
return err
}
// IsRunning 檢查瀏覽器是否正在運行
func (m *BrowserManager) IsRunning() bool {
m.mu.Lock()
defer m.mu.Unlock()
return m.isRunning
}
// SetCurrentModel 設定當前模型
func (m *BrowserManager) SetCurrentModel(model string) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentModel = model
}
// GetCurrentModel 獲取當前模型
func (m *BrowserManager) GetCurrentModel() string {
m.mu.Lock()
defer m.mu.Unlock()
return m.currentModel
}

View File

@ -7,308 +7,200 @@ import (
"time" "time"
"github.com/go-rod/rod" "github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
) )
const geminiURL = "https://gemini.google.com/app" const geminiURL = "https://gemini.google.com/app"
var modelSelectors = map[string]string{ // 輸入框選擇器(依優先順序)
"gemini-2.0-flash": "Flash", var inputSelectors = []string{
"gemini-2.5-pro": "Pro", ".ProseMirror",
"gemini-2.5-pro-thinking": "Thinking", "rich-textarea",
} "div[role='textbox'][contenteditable='true']",
"div[contenteditable='true']",
func NormalizeModel(model string) string { "textarea",
if strings.HasPrefix(model, "gemini-") {
return model
}
return "gemini-" + model
}
func GetModelDisplayName(model string) string {
if name, ok := modelSelectors[model]; ok {
return name
}
return "Flash"
} }
// NavigateToGemini 導航到 Gemini
func NavigateToGemini(page *rod.Page) error { func NavigateToGemini(page *rod.Page) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel() defer cancel()
if err := page.Context(ctx).Navigate(geminiURL); err != nil { if err := page.Context(ctx).Navigate(geminiURL); err != nil {
return fmt.Errorf("failed to navigate to gemini: %w", err) return fmt.Errorf("failed to navigate: %w", err)
} }
return page.Context(ctx).WaitLoad() return page.Context(ctx).WaitLoad()
} }
// IsLoggedIn 檢查是否已登入
func IsLoggedIn(page *rod.Page) bool { func IsLoggedIn(page *rod.Page) bool {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel() defer cancel()
// 嘗試多種可能的登入狀態指示器 for _, sel := range inputSelectors {
selectors := []string{ if _, err := page.Context(ctx).Element(sel); err == nil {
`textarea`,
`[contenteditable="true"]`,
`[aria-label*="chat" i]`,
`button[aria-label*="new" i]`,
}
for _, sel := range selectors {
_, err := page.Context(ctx).Element(sel)
if err == nil {
return true return true
} }
} }
return false return false
} }
// SelectModel 選擇模型(可選)
func SelectModel(page *rod.Page, model string) error { func SelectModel(page *rod.Page, model string) error {
displayName := GetModelDisplayName(model) fmt.Printf("[GeminiWeb] Model selection skipped (using current model)\n")
return nil
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) // TypeInput 在輸入框中輸入文字
func TypeInput(page *rod.Page, text string) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel() defer cancel()
// 嘗試多種可能的模型選擇器選擇器 // 1. 尋找輸入框
selectors := []string{ var inputEl *rod.Element
`button[aria-label*="model" i]`,
`button[aria-label*="Model" i]`,
`[data-test-id="model-selector"]`,
`button[aria-haspopup="listbox"]`,
`[class*="model-selector"]`,
`[class*="model"] button`,
}
var modelSwitcher *rod.Element
var err error var err error
for _, sel := range selectors { for _, sel := range inputSelectors {
modelSwitcher, err = page.Context(ctx).Element(sel) inputEl, err = page.Context(ctx).Element(sel)
if err == nil { if err == nil {
break break
} }
} }
if err != nil { if err != nil {
// 如果找不到模型選擇器,可能是頁面已經在正確的模型上,或是 Gemini 的 UI 不同 return fmt.Errorf("input field not found")
fmt.Printf("Warning: model selector not found, using current model (requested: %s)\n", displayName)
return nil
} }
// 獲取目前的模型文字 // 2. Focus 輸入框
currentText, _ := modelSwitcher.Text() if err := inputEl.Focus(); err != nil {
if currentText != "" && strings.Contains(strings.ToLower(currentText), strings.ToLower(displayName)) { return fmt.Errorf("failed to focus input: %w", err)
// 已經在正確的模型上
return nil
} }
if err := modelSwitcher.Click(proto.InputMouseButtonLeft, 1); err != nil { time.Sleep(100 * time.Millisecond)
return fmt.Errorf("failed to click model selector: %w", err)
// 3. 使用 Input 方法Rod 的正確方式)
if err := inputEl.Input(text); err != nil {
return fmt.Errorf("failed to input text: %w", err)
} }
time.Sleep(500 * time.Millisecond) time.Sleep(100 * time.Millisecond)
// 嘗試多種可能的選項選擇器 // 4. 觸發 Enter 觸發事件
optionSelectors := []string{ _ = inputEl.SelectAllText()
fmt.Sprintf(`[aria-label*="%s" i]`, displayName), _ = page.Keyboard.Press('\r') // Enter key
fmt.Sprintf(`[data-value*="%s" i]`, displayName),
fmt.Sprintf(`text=%s`, displayName),
`[role="option"]`,
}
var option *rod.Element return nil
for _, sel := range optionSelectors {
option, err = page.Context(ctx).Element(sel)
if err == nil {
break
}
}
if err != nil {
fmt.Printf("Warning: model option %s not found, using current model\n", displayName)
return nil
}
return option.Click(proto.InputMouseButtonLeft, 1)
} }
func SendPrompt(page *rod.Page, prompt string) error { // ClickSend 發送訊息
fmt.Printf("[GeminiWeb] Finding input field...\n") func ClickSend(page *rod.Page) error {
// 方法 1: 按 Enter
// 嘗試多種可能的輸入框選擇器 if err := page.Keyboard.Press('\r'); err != nil {
selectors := []string{ return fmt.Errorf("failed to press Enter: %w", err)
`textarea`,
`[contenteditable="true"]`,
`[role="textbox"]`,
`div[contenteditable="true"]`,
`div[role="textbox"]`,
`.ql-editor`,
`rich-textarea`,
} }
var textarea *rod.Element time.Sleep(200 * time.Millisecond)
var err error return nil
for _, sel := range selectors {
fmt.Printf(" Trying selector: %s\n", sel)
textarea, err = page.Element(sel)
if err == nil {
fmt.Printf(" Found with: %s\n", sel)
break
}
}
if err != nil {
return fmt.Errorf("input field not found after trying all selectors: %w", err)
}
fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt))
if err := textarea.Input(prompt); err != nil {
return fmt.Errorf("failed to input prompt: %w", err)
}
fmt.Printf("[GeminiWeb] Finding send button...\n")
time.Sleep(500 * time.Millisecond)
// 嘗試多種可能的發送按鈕選擇器
btnSelectors := []string{
`button[type="submit"]`,
`button[aria-label*="Send" i]`,
`button[aria-label*="submit" i]`,
`button:has(svg)`,
`button`,
}
var sendBtn *rod.Element
for _, sel := range btnSelectors {
fmt.Printf(" Trying button selector: %s\n", sel)
sendBtn, err = page.Element(sel)
if err == nil {
// 檢查是否是發送按鈕(不是其他按鈕)
ariaLabel, _ := sendBtn.Attribute("aria-label")
text, _ := sendBtn.Text()
if ariaLabel != nil || text != "" {
fmt.Printf(" Found button with aria-label=%v text=%s\n", ariaLabel, truncate(text, 20))
}
}
if err == nil {
fmt.Printf(" Found send button with: %s\n", sel)
break
}
}
if err != nil {
// 嘗試按 Enter 鍵發送
fmt.Printf("[GeminiWeb] No send button found, trying Enter key...\n")
if err := page.Keyboard.Press('\n'); err != nil {
return fmt.Errorf("failed to press Enter: %w", err)
}
return nil
}
fmt.Printf("[GeminiWeb] Clicking send button...\n")
return sendBtn.Click(proto.InputMouseButtonLeft, 1)
} }
func truncate(s string, max int) string { // WaitForReady 等待頁面空閒
if len(s) <= max { func WaitForReady(page *rod.Page) error {
return s ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
} defer cancel()
return s[:max] + "..."
}
func WaitForResponse(page *rod.Page, onChunk func(text string), onThinking func(thinking string), onComplete func()) error { fmt.Println("[GeminiWeb] Checking if page is ready...")
lastText := ""
lastThinking := ""
responseComplete := false
timeout := time.NewTimer(120 * time.Second)
defer timeout.Stop()
ticker := time.NewTicker(500 * time.Millisecond)
defer ticker.Stop()
for { for {
select { select {
case <-timeout.C: case <-ctx.Done():
return fmt.Errorf("response timeout") fmt.Println("[GeminiWeb] Page ready check timeout, proceeding anyway")
case <-ticker.C: return nil
textChanged := false default:
time.Sleep(500 * time.Millisecond)
responseEls, err := page.Elements(`.response-text, message-content, .model-response, div[data-test-id="response"]`) // 檢查是否有停止按鈕
if err == nil && len(responseEls) > 0 { hasStopBtn := false
for _, el := range responseEls { stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
text, _ := el.Text() for _, btn := range stopBtns {
text = strings.TrimSpace(text) visible, _ := btn.Visible()
if text != "" && text != lastText { if visible {
if strings.Contains(text, lastText) { hasStopBtn = true
newPart := strings.TrimPrefix(text, lastText) break
if newPart != "" {
onChunk(newPart)
}
} else {
onChunk(text)
}
lastText = text
textChanged = true
}
} }
} }
thinkingEls, err := page.Elements(`.thinking-content, .thought-text, div[data-test-id="thinking"]`) if !hasStopBtn {
if err == nil && len(thinkingEls) > 0 { fmt.Println("[GeminiWeb] Page is ready")
for _, el := range thinkingEls {
thinking, _ := el.Text()
thinking = strings.TrimSpace(thinking)
if thinking != "" && thinking != lastThinking {
if strings.Contains(thinking, lastThinking) {
newPart := strings.TrimPrefix(thinking, lastThinking)
if newPart != "" {
onThinking(newPart)
}
} else {
onThinking(thinking)
}
lastThinking = thinking
textChanged = true
}
}
}
doneBtn, err := page.Element(`button[aria-label*="stop"], button[aria-label*="regenerate"]`)
if err == nil && doneBtn != nil {
ariaLabel, _ := doneBtn.Attribute("aria-label")
if ariaLabel != nil && (*ariaLabel == "Stop" || strings.Contains(*ariaLabel, "regenerate")) {
if !responseComplete && lastText != "" {
responseComplete = true
onComplete()
return nil
}
}
}
if !textChanged && responseComplete {
return nil return nil
} }
} }
} }
} }
func IsRateLimited(page *rod.Page) bool { // ExtractResponse 提取回應文字
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) func ExtractResponse(page *rod.Page) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel() defer cancel()
el, err := page.Context(ctx).Element(`[class*="rate-limit"], [class*="quota"], [data-test-id="rate-limited"]`) var lastText string
return err == nil && el != nil lastUpdate := time.Now()
}
func GetRateLimitMessage(page *rod.Page) string { for {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) select {
defer cancel() case <-ctx.Done():
if lastText != "" {
return lastText, nil
}
return "", fmt.Errorf("response timeout")
default:
time.Sleep(500 * time.Millisecond)
el, err := page.Context(ctx).Element(`[class*="rate-limit"], [class*="quota"], [class*="error-message"]`) // 尋找回應文字
if err != nil || el == nil { for _, sel := range responseSelectors {
return "" elements, err := page.Elements(sel)
if err != nil || len(elements) == 0 {
continue
}
// 取得最後一個元素的文字
lastEl := elements[len(elements)-1]
text, err := lastEl.Text()
if err != nil {
continue
}
text = strings.TrimSpace(text)
if text != "" && text != lastText && len(text) > len(lastText) {
lastText = text
lastUpdate = time.Now()
fmt.Printf("[GeminiWeb] Response length: %d\n", len(text))
}
}
// 檢查是否已完成2 秒內沒有新內容)
if time.Since(lastUpdate) > 2*time.Second && lastText != "" {
// 最後檢查一次是否還有停止按鈕
hasStopBtn := false
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
for _, btn := range stopBtns {
visible, _ := btn.Visible()
if visible {
hasStopBtn = true
break
}
}
if !hasStopBtn {
return lastText, nil
}
}
}
} }
}
text, _ := el.Text()
return strings.TrimSpace(text) // 默認的回應選擇器
var responseSelectors = []string{
".model-response-text",
".message-content",
".markdown",
".prose",
"model-response",
} }

View File

@ -6,164 +6,139 @@ import (
"cursor-api-proxy/internal/config" "cursor-api-proxy/internal/config"
"fmt" "fmt"
"os" "os"
"os/signal" "path/filepath"
"syscall" "strings"
"sync"
"time" "time"
"github.com/go-rod/rod"
) )
// Provider 使用持久化瀏覽器管理器
type Provider struct { type Provider struct {
cfg config.BridgeConfig cfg config.BridgeConfig
pool *SessionPool managerOnce sync.Once
manager *BrowserManager
managerErr error
} }
// NewProvider 建立新的 Provider
func NewProvider(cfg config.BridgeConfig) *Provider { func NewProvider(cfg config.BridgeConfig) *Provider {
return &Provider{cfg: cfg} return &Provider{cfg: cfg}
} }
// getName 返回 Provider 名稱
func (p *Provider) Name() string { func (p *Provider) Name() string {
return "gemini-web" return "gemini-web"
} }
// Close 關閉瀏覽器
func (p *Provider) Close() error { func (p *Provider) Close() error {
if p.manager != nil {
return p.manager.Close()
}
return nil return nil
} }
func (p *Provider) initPool() error { // getManager 獲取或初始化瀏覽器管理器(單例)
if p.pool != nil { func (p *Provider) getManager() (*BrowserManager, error) {
return nil p.managerOnce.Do(func() {
} sessionDir := p.getSessionDir()
pool, err := NewSessionPool(p.cfg.GeminiAccountDir, p.cfg.GeminiMaxSessions) p.manager, p.managerErr = GetBrowserManager(sessionDir, p.cfg.GeminiBrowserVisible)
if err != nil { })
return fmt.Errorf("failed to init session pool: %w", err) return p.manager, p.managerErr
}
p.pool = pool
return nil
} }
// getSessionDir 獲取 session 目錄
func (p *Provider) getSessionDir() string {
// 使用單一 session 目錄(簡化設計)
return filepath.Join(p.cfg.GeminiAccountDir, "default-session")
}
// Generate 生成回應
func (p *Provider) Generate(ctx context.Context, model string, messages []apitypes.Message, tools []apitypes.Tool, cb func(apitypes.StreamChunk)) error { func (p *Provider) Generate(ctx context.Context, model string, messages []apitypes.Message, tools []apitypes.Tool, cb func(apitypes.StreamChunk)) error {
fmt.Printf("[GeminiWeb] Starting generation with model: %s\n", model) fmt.Printf("[GeminiWeb] Starting generation with model: %s\n", model)
if err := p.initPool(); err != nil { // 1. 獲取瀏覽器管理器
return err manager, err := p.getManager()
}
// 檢查是否有可用的已登入 session
session := p.pool.GetAvailable()
needLogin := false
if session == nil {
// 沒有 session建立一個新的
fmt.Printf("[GeminiWeb] No existing session found, creating new session...\n")
var err error
session, err = p.pool.CreateSession(fmt.Sprintf("session-%d", time.Now().Unix()))
if err != nil {
return fmt.Errorf("failed to create session: %w", err)
}
needLogin = true
fmt.Printf("[GeminiWeb] Created new session: %s\n", session.Name)
} else {
fmt.Printf("[GeminiWeb] Using existing session: %s\n", session.Name)
}
p.pool.StartSession(session)
defer p.pool.EndSession(session)
// 如果沒有登入過,強制使用可見瀏覽器
visible := p.cfg.GeminiBrowserVisible || needLogin
browser, err := NewBrowser(visible)
if err != nil { if err != nil {
return fmt.Errorf("failed to create browser: %w", err) return fmt.Errorf("failed to get browser manager: %w", err)
}
defer browser.Close()
page, err := browser.NewPage()
if err != nil {
return fmt.Errorf("failed to create page: %w", err)
} }
// 嘗試載入 cookies // 2. 啟動瀏覽器(如果尚未啟動)
if session.CookieFile != "" { if !manager.IsRunning() {
fmt.Printf("[GeminiWeb] Loading cookies from: %s\n", session.CookieFile) fmt.Printf("[GeminiWeb] Launching browser...\n")
cookies, err := LoadCookiesFromFile(session.CookieFile) if err := manager.Launch(); err != nil {
if err == nil { return fmt.Errorf("failed to launch browser: %w", err)
if err := SetCookiesOnPage(page, cookies); err != nil {
fmt.Printf("[GeminiWeb] Warning: failed to set cookies: %v\n", err)
} else {
fmt.Printf("[GeminiWeb] Loaded %d cookies\n", len(cookies))
}
} else {
fmt.Printf("[GeminiWeb] No existing cookies found\n")
} }
} }
fmt.Printf("[GeminiWeb] Navigating to Gemini...\n") // 3. 獲取頁面
if err := NavigateToGemini(page); err != nil { page, err := manager.GetPage()
return fmt.Errorf("failed to navigate: %w", err) if err != nil {
return fmt.Errorf("failed to get page: %w", err)
} }
time.Sleep(2 * time.Second) // 4. 檢查當前 URL如果不是 Gemini 則導航
currentURL, _ := page.Info()
if !strings.Contains(currentURL.URL, "gemini.google.com") {
fmt.Printf("[GeminiWeb] Navigating to Gemini...\n")
if err := NavigateToGemini(page); err != nil {
return fmt.Errorf("failed to navigate: %w", err)
}
time.Sleep(2 * time.Second)
}
// 5. 檢查登入狀態
fmt.Printf("[GeminiWeb] Checking login status...\n") fmt.Printf("[GeminiWeb] Checking login status...\n")
if IsLoggedIn(page) { if !IsLoggedIn(page) {
fmt.Printf("[GeminiWeb] Logged in (using saved cookies)\n") fmt.Printf("[GeminiWeb] Not logged in, continuing anyway\n")
} else {
fmt.Printf("[GeminiWeb] Not logged in - continuing without login\n")
if visible { if p.cfg.GeminiBrowserVisible {
// 如果瀏覽器可見,提示使用者可以登入,但繼續執行不等待
fmt.Println("\n========================================") fmt.Println("\n========================================")
fmt.Println("Browser is open. You can:") fmt.Println("Browser is open. You can:")
fmt.Println("1. Log in to Gemini now (to use your account)") fmt.Println("1. Log in to Gemini now")
fmt.Println("2. Continue without login") fmt.Println("2. Continue without login")
fmt.Println("\nThe request will proceed without waiting.")
fmt.Println("If you log in during this session, cookies will be saved automatically.")
fmt.Println("========================================\n") fmt.Println("========================================\n")
// 異步保存 cookies如果使用者登入了
go func() {
time.Sleep(30 * time.Second) // 給使用者 30 秒登入
if IsLoggedIn(page) {
cookies, err := GetPageCookies(page)
if err == nil {
SaveCookiesToFile(cookies, session.CookieFile)
fmt.Printf("[GeminiWeb] Saved %d cookies for future use\n", len(cookies))
}
}
}()
} }
} else {
fmt.Printf("[GeminiWeb] Logged in\n")
} }
fmt.Printf("[GeminiWeb] Selecting model: %s\n", model) // 6. 等待頁面就緒
if err := SelectModel(page, model); err != nil { if err := WaitForReady(page); err != nil {
return fmt.Errorf("failed to select model: %w", err) fmt.Printf("[GeminiWeb] Warning: %v\n", err)
} }
fmt.Printf("[GeminiWeb] Model selected\n")
time.Sleep(500 * time.Millisecond)
// 7. 建構提示詞
prompt := buildPromptFromMessages(messages) prompt := buildPromptFromMessages(messages)
fmt.Printf("[GeminiWeb] Sending prompt (length: %d chars)\n", len(prompt)) fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt))
if err := SendPrompt(page, prompt); err != nil {
return fmt.Errorf("failed to send prompt: %w", err)
}
fmt.Printf("[GeminiWeb] Prompt sent, waiting for response...\n")
return WaitForResponse(page, // 8. 輸入文字
func(text string) { if err := TypeInput(page, prompt); err != nil {
cb(apitypes.StreamChunk{Type: apitypes.ChunkText, Text: text}) return fmt.Errorf("failed to type input: %w", err)
}, }
func(thinking string) {
cb(apitypes.StreamChunk{Type: apitypes.ChunkThinking, Thinking: thinking}) // 9. 發送
}, fmt.Printf("[GeminiWeb] Sending message...\n")
func() { if err := ClickSend(page); err != nil {
cb(apitypes.StreamChunk{Type: apitypes.ChunkDone, Done: true}) return fmt.Errorf("failed to send: %w", err)
}, }
)
// 10. 提取回應
fmt.Printf("[GeminiWeb] Waiting for response...\n")
response, err := ExtractResponse(page)
if err != nil {
return fmt.Errorf("failed to extract response: %w", err)
}
// 11. 串流回調
cb(apitypes.StreamChunk{Type: apitypes.ChunkText, Text: response})
cb(apitypes.StreamChunk{Type: apitypes.ChunkDone, Done: true})
fmt.Printf("[GeminiWeb] Response complete (%d chars)\n", len(response))
return nil
} }
// buildPromptFromMessages 從訊息列表建構提示詞
func buildPromptFromMessages(messages []apitypes.Message) string { func buildPromptFromMessages(messages []apitypes.Message) string {
var prompt string var prompt string
for _, m := range messages { for _, m := range messages {
@ -179,74 +154,43 @@ func buildPromptFromMessages(messages []apitypes.Message) string {
return prompt return prompt
} }
// RunLogin 執行登入流程(供 gemini-login 命令使用)
func RunLogin(cfg config.BridgeConfig, sessionName string) error { func RunLogin(cfg config.BridgeConfig, sessionName string) error {
if sessionName == "" { if sessionName == "" {
sessionName = fmt.Sprintf("session-%d", time.Now().Unix()) sessionName = "default-session"
} }
pool, err := NewSessionPool(cfg.GeminiAccountDir, cfg.GeminiMaxSessions) sessionDir := filepath.Join(cfg.GeminiAccountDir, sessionName)
if err != nil { if err := os.MkdirAll(sessionDir, 0755); err != nil {
return fmt.Errorf("failed to init pool: %w", err) return fmt.Errorf("failed to create session dir: %w", err)
}
session, err := pool.CreateSession(sessionName)
if err != nil {
return fmt.Errorf("failed to create session: %w", err)
} }
fmt.Printf("Starting browser for login. Session: %s\n", sessionName) fmt.Printf("Starting browser for login. Session: %s\n", sessionName)
fmt.Printf("Session directory: %s\n", sessionDir)
fmt.Println("Please log in to your Gemini account in the browser window.") fmt.Println("Please log in to your Gemini account in the browser window.")
fmt.Println("Press Ctrl+C when you have completed the login...") fmt.Println("Press Ctrl+C when you have completed the login...")
browser, err := NewBrowser(true) manager, err := NewBrowserManager(sessionDir, true) // visible=true
if err != nil { if err != nil {
return fmt.Errorf("failed to create browser: %w", err) return fmt.Errorf("failed to create browser manager: %w", err)
} }
defer browser.Close()
page, err := browser.NewPage() if err := manager.Launch(); err != nil {
return fmt.Errorf("failed to launch browser: %w", err)
}
defer manager.Close()
page, err := manager.GetPage()
if err != nil { if err != nil {
return fmt.Errorf("failed to create page: %w", err) return fmt.Errorf("failed to get page: %w", err)
} }
if err := NavigateToGemini(page); err != nil { if err := NavigateToGemini(page); err != nil {
return fmt.Errorf("failed to navigate: %w", err) return fmt.Errorf("failed to navigate: %w", err)
} }
sigChan := make(chan os.Signal, 1) // 等待用戶手動登入...
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) // 使用 Ctrl+C 退出,瀏覽器資料會自動保存在 userDataDir
<-sigChan
cookies, err := GetPageCookies(page)
if err != nil {
return fmt.Errorf("failed to get cookies: %w", err)
}
if err := SaveCookiesToFile(cookies, session.CookieFile); err != nil {
return fmt.Errorf("failed to save cookies: %w", err)
}
fmt.Printf("Session saved successfully: %s\n", sessionName)
return nil return nil
} }
func GetPageCookies(page *rod.Page) ([]Cookie, error) {
cookies, err := page.Cookies([]string{})
if err != nil {
return nil, fmt.Errorf("failed to get cookies: %w", err)
}
var result []Cookie
for _, c := range cookies {
result = append(result, Cookie{
Name: c.Name,
Value: c.Value,
Domain: c.Domain,
Path: c.Path,
HTTPOnly: c.HTTPOnly,
Secure: c.Secure,
})
}
return result, nil
}