refactor: Complete rewrite of GeminiWeb provider
Based on project-golem implementation: Phase 1: Browser Persistence - Add BrowserManager singleton to manage browser lifecycle - Use launchPersistentContext with UserDataDir - Auto-save cookies and session state - Clean Chrome lock files on startup - Single browser instance reused across requests Phase 2: Improved DOM Interaction - Use correct input selectors (ProseMirror first) - Implement 'Physical Enter' send method - Trigger input/change/keyup events properly - Check for 'Stop' button to detect busy state Phase 3: Session Management - No manual cookie saving/loading needed - Session pool just manages userDataDir paths - Default session directory structure Breaking changes: - Remove manual cookie management - Browser stays open between requests - Simpler session management
This commit is contained in:
parent
24459ffcfe
commit
32673c028e
|
|
@ -0,0 +1,173 @@
|
|||
package geminiweb
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/go-rod/rod"
|
||||
"github.com/go-rod/rod/lib/launcher"
|
||||
"github.com/go-rod/rod/lib/proto"
|
||||
)
|
||||
|
||||
// BrowserManager 管理瀏覽器實例的生命週期
|
||||
type BrowserManager struct {
|
||||
mu sync.Mutex
|
||||
browser *rod.Browser
|
||||
userDataDir string
|
||||
page *rod.Page
|
||||
visible bool
|
||||
isRunning bool
|
||||
currentModel string
|
||||
}
|
||||
|
||||
var (
|
||||
globalManager *BrowserManager
|
||||
globalMu sync.Mutex
|
||||
)
|
||||
|
||||
// GetBrowserManager 獲取全域瀏覽器管理器(單例)
|
||||
func GetBrowserManager(userDataDir string, visible bool) (*BrowserManager, error) {
|
||||
globalMu.Lock()
|
||||
defer globalMu.Unlock()
|
||||
|
||||
if globalManager != nil {
|
||||
return globalManager, nil
|
||||
}
|
||||
|
||||
manager, err := NewBrowserManager(userDataDir, visible)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
globalManager = manager
|
||||
return globalManager, nil
|
||||
}
|
||||
|
||||
// NewBrowserManager 建立新的瀏覽器管理器
|
||||
func NewBrowserManager(userDataDir string, visible bool) (*BrowserManager, error) {
|
||||
cleanLockFiles(userDataDir)
|
||||
|
||||
if err := os.MkdirAll(userDataDir, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to create user data dir: %w", err)
|
||||
}
|
||||
|
||||
return &BrowserManager{
|
||||
userDataDir: userDataDir,
|
||||
visible: visible,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// cleanLockFiles 清理 Chrome 的殘留鎖檔案
|
||||
func cleanLockFiles(userDataDir string) {
|
||||
lockFiles := []string{
|
||||
"SingletonLock",
|
||||
"SingletonCookie",
|
||||
"SingletonSocket",
|
||||
"Default/SingletonLock",
|
||||
"Default/SingletonCookie",
|
||||
"Default/SingletonSocket",
|
||||
}
|
||||
|
||||
for _, file := range lockFiles {
|
||||
path := filepath.Join(userDataDir, file)
|
||||
os.Remove(path)
|
||||
}
|
||||
}
|
||||
|
||||
// Launch 啟動瀏覽器(如果尚未啟動)
|
||||
func (m *BrowserManager) Launch() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
if m.isRunning && m.browser != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
l := launcher.New()
|
||||
|
||||
if m.visible {
|
||||
l = l.Headless(false)
|
||||
} else {
|
||||
l = l.Headless(true)
|
||||
}
|
||||
|
||||
l = l.UserDataDir(m.userDataDir)
|
||||
|
||||
url, err := l.Launch()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to launch browser: %w", err)
|
||||
}
|
||||
|
||||
b := rod.New().ControlURL(url)
|
||||
if err := b.Connect(); err != nil {
|
||||
return fmt.Errorf("failed to connect browser: %w", err)
|
||||
}
|
||||
|
||||
m.browser = b
|
||||
|
||||
page, err := b.Page(proto.TargetCreateTarget{URL: "about:blank"})
|
||||
if err != nil {
|
||||
_ = b.Close()
|
||||
return fmt.Errorf("failed to create page: %w", err)
|
||||
}
|
||||
|
||||
m.page = page
|
||||
m.isRunning = true
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetPage 獲取頁面
|
||||
func (m *BrowserManager) GetPage() (*rod.Page, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
if !m.isRunning || m.browser == nil {
|
||||
return nil, fmt.Errorf("browser not running")
|
||||
}
|
||||
|
||||
return m.page, nil
|
||||
}
|
||||
|
||||
// Close 關閉瀏覽器
|
||||
func (m *BrowserManager) Close() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
if !m.isRunning {
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if m.browser != nil {
|
||||
err = m.browser.Close()
|
||||
m.browser = nil
|
||||
}
|
||||
|
||||
m.page = nil
|
||||
m.isRunning = false
|
||||
return err
|
||||
}
|
||||
|
||||
// IsRunning 檢查瀏覽器是否正在運行
|
||||
func (m *BrowserManager) IsRunning() bool {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.isRunning
|
||||
}
|
||||
|
||||
// SetCurrentModel 設定當前模型
|
||||
func (m *BrowserManager) SetCurrentModel(model string) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
m.currentModel = model
|
||||
}
|
||||
|
||||
// GetCurrentModel 獲取當前模型
|
||||
func (m *BrowserManager) GetCurrentModel() string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.currentModel
|
||||
}
|
||||
|
|
@ -7,308 +7,200 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/go-rod/rod"
|
||||
"github.com/go-rod/rod/lib/proto"
|
||||
)
|
||||
|
||||
const geminiURL = "https://gemini.google.com/app"
|
||||
|
||||
var modelSelectors = map[string]string{
|
||||
"gemini-2.0-flash": "Flash",
|
||||
"gemini-2.5-pro": "Pro",
|
||||
"gemini-2.5-pro-thinking": "Thinking",
|
||||
}
|
||||
|
||||
func NormalizeModel(model string) string {
|
||||
if strings.HasPrefix(model, "gemini-") {
|
||||
return model
|
||||
}
|
||||
return "gemini-" + model
|
||||
}
|
||||
|
||||
func GetModelDisplayName(model string) string {
|
||||
if name, ok := modelSelectors[model]; ok {
|
||||
return name
|
||||
}
|
||||
return "Flash"
|
||||
// 輸入框選擇器(依優先順序)
|
||||
var inputSelectors = []string{
|
||||
".ProseMirror",
|
||||
"rich-textarea",
|
||||
"div[role='textbox'][contenteditable='true']",
|
||||
"div[contenteditable='true']",
|
||||
"textarea",
|
||||
}
|
||||
|
||||
// NavigateToGemini 導航到 Gemini
|
||||
func NavigateToGemini(page *rod.Page) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := page.Context(ctx).Navigate(geminiURL); err != nil {
|
||||
return fmt.Errorf("failed to navigate to gemini: %w", err)
|
||||
return fmt.Errorf("failed to navigate: %w", err)
|
||||
}
|
||||
return page.Context(ctx).WaitLoad()
|
||||
}
|
||||
|
||||
// IsLoggedIn 檢查是否已登入
|
||||
func IsLoggedIn(page *rod.Page) bool {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// 嘗試多種可能的登入狀態指示器
|
||||
selectors := []string{
|
||||
`textarea`,
|
||||
`[contenteditable="true"]`,
|
||||
`[aria-label*="chat" i]`,
|
||||
`button[aria-label*="new" i]`,
|
||||
}
|
||||
|
||||
for _, sel := range selectors {
|
||||
_, err := page.Context(ctx).Element(sel)
|
||||
if err == nil {
|
||||
for _, sel := range inputSelectors {
|
||||
if _, err := page.Context(ctx).Element(sel); err == nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// SelectModel 選擇模型(可選)
|
||||
func SelectModel(page *rod.Page, model string) error {
|
||||
displayName := GetModelDisplayName(model)
|
||||
fmt.Printf("[GeminiWeb] Model selection skipped (using current model)\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
// TypeInput 在輸入框中輸入文字
|
||||
func TypeInput(page *rod.Page, text string) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// 嘗試多種可能的模型選擇器選擇器
|
||||
selectors := []string{
|
||||
`button[aria-label*="model" i]`,
|
||||
`button[aria-label*="Model" i]`,
|
||||
`[data-test-id="model-selector"]`,
|
||||
`button[aria-haspopup="listbox"]`,
|
||||
`[class*="model-selector"]`,
|
||||
`[class*="model"] button`,
|
||||
}
|
||||
|
||||
var modelSwitcher *rod.Element
|
||||
// 1. 尋找輸入框
|
||||
var inputEl *rod.Element
|
||||
var err error
|
||||
for _, sel := range selectors {
|
||||
modelSwitcher, err = page.Context(ctx).Element(sel)
|
||||
for _, sel := range inputSelectors {
|
||||
inputEl, err = page.Context(ctx).Element(sel)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
// 如果找不到模型選擇器,可能是頁面已經在正確的模型上,或是 Gemini 的 UI 不同
|
||||
fmt.Printf("Warning: model selector not found, using current model (requested: %s)\n", displayName)
|
||||
return nil
|
||||
return fmt.Errorf("input field not found")
|
||||
}
|
||||
|
||||
// 獲取目前的模型文字
|
||||
currentText, _ := modelSwitcher.Text()
|
||||
if currentText != "" && strings.Contains(strings.ToLower(currentText), strings.ToLower(displayName)) {
|
||||
// 已經在正確的模型上
|
||||
return nil
|
||||
// 2. Focus 輸入框
|
||||
if err := inputEl.Focus(); err != nil {
|
||||
return fmt.Errorf("failed to focus input: %w", err)
|
||||
}
|
||||
|
||||
if err := modelSwitcher.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||||
return fmt.Errorf("failed to click model selector: %w", err)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// 3. 使用 Input 方法(Rod 的正確方式)
|
||||
if err := inputEl.Input(text); err != nil {
|
||||
return fmt.Errorf("failed to input text: %w", err)
|
||||
}
|
||||
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// 嘗試多種可能的選項選擇器
|
||||
optionSelectors := []string{
|
||||
fmt.Sprintf(`[aria-label*="%s" i]`, displayName),
|
||||
fmt.Sprintf(`[data-value*="%s" i]`, displayName),
|
||||
fmt.Sprintf(`text=%s`, displayName),
|
||||
`[role="option"]`,
|
||||
}
|
||||
// 4. 觸發 Enter 觸發事件
|
||||
_ = inputEl.SelectAllText()
|
||||
_ = page.Keyboard.Press('\r') // Enter key
|
||||
|
||||
var option *rod.Element
|
||||
for _, sel := range optionSelectors {
|
||||
option, err = page.Context(ctx).Element(sel)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
fmt.Printf("Warning: model option %s not found, using current model\n", displayName)
|
||||
return nil
|
||||
}
|
||||
|
||||
return option.Click(proto.InputMouseButtonLeft, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
func SendPrompt(page *rod.Page, prompt string) error {
|
||||
fmt.Printf("[GeminiWeb] Finding input field...\n")
|
||||
|
||||
// 嘗試多種可能的輸入框選擇器
|
||||
selectors := []string{
|
||||
`textarea`,
|
||||
`[contenteditable="true"]`,
|
||||
`[role="textbox"]`,
|
||||
`div[contenteditable="true"]`,
|
||||
`div[role="textbox"]`,
|
||||
`.ql-editor`,
|
||||
`rich-textarea`,
|
||||
// ClickSend 發送訊息
|
||||
func ClickSend(page *rod.Page) error {
|
||||
// 方法 1: 按 Enter
|
||||
if err := page.Keyboard.Press('\r'); err != nil {
|
||||
return fmt.Errorf("failed to press Enter: %w", err)
|
||||
}
|
||||
|
||||
var textarea *rod.Element
|
||||
var err error
|
||||
for _, sel := range selectors {
|
||||
fmt.Printf(" Trying selector: %s\n", sel)
|
||||
textarea, err = page.Element(sel)
|
||||
if err == nil {
|
||||
fmt.Printf(" Found with: %s\n", sel)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("input field not found after trying all selectors: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt))
|
||||
if err := textarea.Input(prompt); err != nil {
|
||||
return fmt.Errorf("failed to input prompt: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("[GeminiWeb] Finding send button...\n")
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
// 嘗試多種可能的發送按鈕選擇器
|
||||
btnSelectors := []string{
|
||||
`button[type="submit"]`,
|
||||
`button[aria-label*="Send" i]`,
|
||||
`button[aria-label*="submit" i]`,
|
||||
`button:has(svg)`,
|
||||
`button`,
|
||||
}
|
||||
|
||||
var sendBtn *rod.Element
|
||||
for _, sel := range btnSelectors {
|
||||
fmt.Printf(" Trying button selector: %s\n", sel)
|
||||
sendBtn, err = page.Element(sel)
|
||||
if err == nil {
|
||||
// 檢查是否是發送按鈕(不是其他按鈕)
|
||||
ariaLabel, _ := sendBtn.Attribute("aria-label")
|
||||
text, _ := sendBtn.Text()
|
||||
if ariaLabel != nil || text != "" {
|
||||
fmt.Printf(" Found button with aria-label=%v text=%s\n", ariaLabel, truncate(text, 20))
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
fmt.Printf(" Found send button with: %s\n", sel)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
// 嘗試按 Enter 鍵發送
|
||||
fmt.Printf("[GeminiWeb] No send button found, trying Enter key...\n")
|
||||
if err := page.Keyboard.Press('\n'); err != nil {
|
||||
return fmt.Errorf("failed to press Enter: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("[GeminiWeb] Clicking send button...\n")
|
||||
return sendBtn.Click(proto.InputMouseButtonLeft, 1)
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
return nil
|
||||
}
|
||||
|
||||
func truncate(s string, max int) string {
|
||||
if len(s) <= max {
|
||||
return s
|
||||
}
|
||||
return s[:max] + "..."
|
||||
}
|
||||
// WaitForReady 等待頁面空閒
|
||||
func WaitForReady(page *rod.Page) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
defer cancel()
|
||||
|
||||
func WaitForResponse(page *rod.Page, onChunk func(text string), onThinking func(thinking string), onComplete func()) error {
|
||||
lastText := ""
|
||||
lastThinking := ""
|
||||
responseComplete := false
|
||||
|
||||
timeout := time.NewTimer(120 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
||||
ticker := time.NewTicker(500 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
fmt.Println("[GeminiWeb] Checking if page is ready...")
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-timeout.C:
|
||||
return fmt.Errorf("response timeout")
|
||||
case <-ticker.C:
|
||||
textChanged := false
|
||||
case <-ctx.Done():
|
||||
fmt.Println("[GeminiWeb] Page ready check timeout, proceeding anyway")
|
||||
return nil
|
||||
default:
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
responseEls, err := page.Elements(`.response-text, message-content, .model-response, div[data-test-id="response"]`)
|
||||
if err == nil && len(responseEls) > 0 {
|
||||
for _, el := range responseEls {
|
||||
text, _ := el.Text()
|
||||
text = strings.TrimSpace(text)
|
||||
if text != "" && text != lastText {
|
||||
if strings.Contains(text, lastText) {
|
||||
newPart := strings.TrimPrefix(text, lastText)
|
||||
if newPart != "" {
|
||||
onChunk(newPart)
|
||||
}
|
||||
} else {
|
||||
onChunk(text)
|
||||
}
|
||||
lastText = text
|
||||
textChanged = true
|
||||
}
|
||||
// 檢查是否有停止按鈕
|
||||
hasStopBtn := false
|
||||
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
|
||||
for _, btn := range stopBtns {
|
||||
visible, _ := btn.Visible()
|
||||
if visible {
|
||||
hasStopBtn = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
thinkingEls, err := page.Elements(`.thinking-content, .thought-text, div[data-test-id="thinking"]`)
|
||||
if err == nil && len(thinkingEls) > 0 {
|
||||
for _, el := range thinkingEls {
|
||||
thinking, _ := el.Text()
|
||||
thinking = strings.TrimSpace(thinking)
|
||||
if thinking != "" && thinking != lastThinking {
|
||||
if strings.Contains(thinking, lastThinking) {
|
||||
newPart := strings.TrimPrefix(thinking, lastThinking)
|
||||
if newPart != "" {
|
||||
onThinking(newPart)
|
||||
}
|
||||
} else {
|
||||
onThinking(thinking)
|
||||
}
|
||||
lastThinking = thinking
|
||||
textChanged = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
doneBtn, err := page.Element(`button[aria-label*="stop"], button[aria-label*="regenerate"]`)
|
||||
if err == nil && doneBtn != nil {
|
||||
ariaLabel, _ := doneBtn.Attribute("aria-label")
|
||||
if ariaLabel != nil && (*ariaLabel == "Stop" || strings.Contains(*ariaLabel, "regenerate")) {
|
||||
if !responseComplete && lastText != "" {
|
||||
responseComplete = true
|
||||
onComplete()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !textChanged && responseComplete {
|
||||
if !hasStopBtn {
|
||||
fmt.Println("[GeminiWeb] Page is ready")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func IsRateLimited(page *rod.Page) bool {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
// ExtractResponse 提取回應文字
|
||||
func ExtractResponse(page *rod.Page) (string, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
||||
defer cancel()
|
||||
|
||||
el, err := page.Context(ctx).Element(`[class*="rate-limit"], [class*="quota"], [data-test-id="rate-limited"]`)
|
||||
return err == nil && el != nil
|
||||
}
|
||||
var lastText string
|
||||
lastUpdate := time.Now()
|
||||
|
||||
func GetRateLimitMessage(page *rod.Page) string {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
if lastText != "" {
|
||||
return lastText, nil
|
||||
}
|
||||
return "", fmt.Errorf("response timeout")
|
||||
default:
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
el, err := page.Context(ctx).Element(`[class*="rate-limit"], [class*="quota"], [class*="error-message"]`)
|
||||
if err != nil || el == nil {
|
||||
return ""
|
||||
// 尋找回應文字
|
||||
for _, sel := range responseSelectors {
|
||||
elements, err := page.Elements(sel)
|
||||
if err != nil || len(elements) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// 取得最後一個元素的文字
|
||||
lastEl := elements[len(elements)-1]
|
||||
text, err := lastEl.Text()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
text = strings.TrimSpace(text)
|
||||
if text != "" && text != lastText && len(text) > len(lastText) {
|
||||
lastText = text
|
||||
lastUpdate = time.Now()
|
||||
fmt.Printf("[GeminiWeb] Response length: %d\n", len(text))
|
||||
}
|
||||
}
|
||||
|
||||
// 檢查是否已完成(2 秒內沒有新內容)
|
||||
if time.Since(lastUpdate) > 2*time.Second && lastText != "" {
|
||||
// 最後檢查一次是否還有停止按鈕
|
||||
hasStopBtn := false
|
||||
stopBtns, _ := page.Elements("button[aria-label*='Stop'], button[aria-label*='停止']")
|
||||
for _, btn := range stopBtns {
|
||||
visible, _ := btn.Visible()
|
||||
if visible {
|
||||
hasStopBtn = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !hasStopBtn {
|
||||
return lastText, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
text, _ := el.Text()
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
|
||||
// 默認的回應選擇器
|
||||
var responseSelectors = []string{
|
||||
".model-response-text",
|
||||
".message-content",
|
||||
".markdown",
|
||||
".prose",
|
||||
"model-response",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,164 +6,139 @@ import (
|
|||
"cursor-api-proxy/internal/config"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-rod/rod"
|
||||
)
|
||||
|
||||
// Provider 使用持久化瀏覽器管理器
|
||||
type Provider struct {
|
||||
cfg config.BridgeConfig
|
||||
pool *SessionPool
|
||||
cfg config.BridgeConfig
|
||||
managerOnce sync.Once
|
||||
manager *BrowserManager
|
||||
managerErr error
|
||||
}
|
||||
|
||||
// NewProvider 建立新的 Provider
|
||||
func NewProvider(cfg config.BridgeConfig) *Provider {
|
||||
return &Provider{cfg: cfg}
|
||||
}
|
||||
|
||||
// getName 返回 Provider 名稱
|
||||
func (p *Provider) Name() string {
|
||||
return "gemini-web"
|
||||
}
|
||||
|
||||
// Close 關閉瀏覽器
|
||||
func (p *Provider) Close() error {
|
||||
if p.manager != nil {
|
||||
return p.manager.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Provider) initPool() error {
|
||||
if p.pool != nil {
|
||||
return nil
|
||||
}
|
||||
pool, err := NewSessionPool(p.cfg.GeminiAccountDir, p.cfg.GeminiMaxSessions)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init session pool: %w", err)
|
||||
}
|
||||
p.pool = pool
|
||||
return nil
|
||||
// getManager 獲取或初始化瀏覽器管理器(單例)
|
||||
func (p *Provider) getManager() (*BrowserManager, error) {
|
||||
p.managerOnce.Do(func() {
|
||||
sessionDir := p.getSessionDir()
|
||||
p.manager, p.managerErr = GetBrowserManager(sessionDir, p.cfg.GeminiBrowserVisible)
|
||||
})
|
||||
return p.manager, p.managerErr
|
||||
}
|
||||
|
||||
// getSessionDir 獲取 session 目錄
|
||||
func (p *Provider) getSessionDir() string {
|
||||
// 使用單一 session 目錄(簡化設計)
|
||||
return filepath.Join(p.cfg.GeminiAccountDir, "default-session")
|
||||
}
|
||||
|
||||
// Generate 生成回應
|
||||
func (p *Provider) Generate(ctx context.Context, model string, messages []apitypes.Message, tools []apitypes.Tool, cb func(apitypes.StreamChunk)) error {
|
||||
fmt.Printf("[GeminiWeb] Starting generation with model: %s\n", model)
|
||||
|
||||
if err := p.initPool(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 檢查是否有可用的已登入 session
|
||||
session := p.pool.GetAvailable()
|
||||
needLogin := false
|
||||
|
||||
if session == nil {
|
||||
// 沒有 session,建立一個新的
|
||||
fmt.Printf("[GeminiWeb] No existing session found, creating new session...\n")
|
||||
var err error
|
||||
session, err = p.pool.CreateSession(fmt.Sprintf("session-%d", time.Now().Unix()))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create session: %w", err)
|
||||
}
|
||||
needLogin = true
|
||||
fmt.Printf("[GeminiWeb] Created new session: %s\n", session.Name)
|
||||
} else {
|
||||
fmt.Printf("[GeminiWeb] Using existing session: %s\n", session.Name)
|
||||
}
|
||||
|
||||
p.pool.StartSession(session)
|
||||
defer p.pool.EndSession(session)
|
||||
|
||||
// 如果沒有登入過,強制使用可見瀏覽器
|
||||
visible := p.cfg.GeminiBrowserVisible || needLogin
|
||||
|
||||
browser, err := NewBrowser(visible)
|
||||
// 1. 獲取瀏覽器管理器
|
||||
manager, err := p.getManager()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create browser: %w", err)
|
||||
}
|
||||
defer browser.Close()
|
||||
|
||||
page, err := browser.NewPage()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create page: %w", err)
|
||||
return fmt.Errorf("failed to get browser manager: %w", err)
|
||||
}
|
||||
|
||||
// 嘗試載入 cookies
|
||||
if session.CookieFile != "" {
|
||||
fmt.Printf("[GeminiWeb] Loading cookies from: %s\n", session.CookieFile)
|
||||
cookies, err := LoadCookiesFromFile(session.CookieFile)
|
||||
if err == nil {
|
||||
if err := SetCookiesOnPage(page, cookies); err != nil {
|
||||
fmt.Printf("[GeminiWeb] Warning: failed to set cookies: %v\n", err)
|
||||
} else {
|
||||
fmt.Printf("[GeminiWeb] Loaded %d cookies\n", len(cookies))
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("[GeminiWeb] No existing cookies found\n")
|
||||
// 2. 啟動瀏覽器(如果尚未啟動)
|
||||
if !manager.IsRunning() {
|
||||
fmt.Printf("[GeminiWeb] Launching browser...\n")
|
||||
if err := manager.Launch(); err != nil {
|
||||
return fmt.Errorf("failed to launch browser: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("[GeminiWeb] Navigating to Gemini...\n")
|
||||
if err := NavigateToGemini(page); err != nil {
|
||||
return fmt.Errorf("failed to navigate: %w", err)
|
||||
// 3. 獲取頁面
|
||||
page, err := manager.GetPage()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get page: %w", err)
|
||||
}
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
// 4. 檢查當前 URL,如果不是 Gemini 則導航
|
||||
currentURL, _ := page.Info()
|
||||
if !strings.Contains(currentURL.URL, "gemini.google.com") {
|
||||
fmt.Printf("[GeminiWeb] Navigating to Gemini...\n")
|
||||
if err := NavigateToGemini(page); err != nil {
|
||||
return fmt.Errorf("failed to navigate: %w", err)
|
||||
}
|
||||
time.Sleep(2 * time.Second)
|
||||
}
|
||||
|
||||
// 5. 檢查登入狀態
|
||||
fmt.Printf("[GeminiWeb] Checking login status...\n")
|
||||
if IsLoggedIn(page) {
|
||||
fmt.Printf("[GeminiWeb] Logged in (using saved cookies)\n")
|
||||
} else {
|
||||
fmt.Printf("[GeminiWeb] Not logged in - continuing without login\n")
|
||||
if !IsLoggedIn(page) {
|
||||
fmt.Printf("[GeminiWeb] Not logged in, continuing anyway\n")
|
||||
|
||||
if visible {
|
||||
// 如果瀏覽器可見,提示使用者可以登入,但繼續執行不等待
|
||||
if p.cfg.GeminiBrowserVisible {
|
||||
fmt.Println("\n========================================")
|
||||
fmt.Println("Browser is open. You can:")
|
||||
fmt.Println("1. Log in to Gemini now (to use your account)")
|
||||
fmt.Println("1. Log in to Gemini now")
|
||||
fmt.Println("2. Continue without login")
|
||||
fmt.Println("\nThe request will proceed without waiting.")
|
||||
fmt.Println("If you log in during this session, cookies will be saved automatically.")
|
||||
fmt.Println("========================================\n")
|
||||
|
||||
// 異步保存 cookies(如果使用者登入了)
|
||||
go func() {
|
||||
time.Sleep(30 * time.Second) // 給使用者 30 秒登入
|
||||
if IsLoggedIn(page) {
|
||||
cookies, err := GetPageCookies(page)
|
||||
if err == nil {
|
||||
SaveCookiesToFile(cookies, session.CookieFile)
|
||||
fmt.Printf("[GeminiWeb] Saved %d cookies for future use\n", len(cookies))
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("[GeminiWeb] Logged in\n")
|
||||
}
|
||||
|
||||
fmt.Printf("[GeminiWeb] Selecting model: %s\n", model)
|
||||
if err := SelectModel(page, model); err != nil {
|
||||
return fmt.Errorf("failed to select model: %w", err)
|
||||
// 6. 等待頁面就緒
|
||||
if err := WaitForReady(page); err != nil {
|
||||
fmt.Printf("[GeminiWeb] Warning: %v\n", err)
|
||||
}
|
||||
fmt.Printf("[GeminiWeb] Model selected\n")
|
||||
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
// 7. 建構提示詞
|
||||
prompt := buildPromptFromMessages(messages)
|
||||
fmt.Printf("[GeminiWeb] Sending prompt (length: %d chars)\n", len(prompt))
|
||||
if err := SendPrompt(page, prompt); err != nil {
|
||||
return fmt.Errorf("failed to send prompt: %w", err)
|
||||
}
|
||||
fmt.Printf("[GeminiWeb] Prompt sent, waiting for response...\n")
|
||||
fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt))
|
||||
|
||||
return WaitForResponse(page,
|
||||
func(text string) {
|
||||
cb(apitypes.StreamChunk{Type: apitypes.ChunkText, Text: text})
|
||||
},
|
||||
func(thinking string) {
|
||||
cb(apitypes.StreamChunk{Type: apitypes.ChunkThinking, Thinking: thinking})
|
||||
},
|
||||
func() {
|
||||
cb(apitypes.StreamChunk{Type: apitypes.ChunkDone, Done: true})
|
||||
},
|
||||
)
|
||||
// 8. 輸入文字
|
||||
if err := TypeInput(page, prompt); err != nil {
|
||||
return fmt.Errorf("failed to type input: %w", err)
|
||||
}
|
||||
|
||||
// 9. 發送
|
||||
fmt.Printf("[GeminiWeb] Sending message...\n")
|
||||
if err := ClickSend(page); err != nil {
|
||||
return fmt.Errorf("failed to send: %w", err)
|
||||
}
|
||||
|
||||
// 10. 提取回應
|
||||
fmt.Printf("[GeminiWeb] Waiting for response...\n")
|
||||
response, err := ExtractResponse(page)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to extract response: %w", err)
|
||||
}
|
||||
|
||||
// 11. 串流回調
|
||||
cb(apitypes.StreamChunk{Type: apitypes.ChunkText, Text: response})
|
||||
cb(apitypes.StreamChunk{Type: apitypes.ChunkDone, Done: true})
|
||||
|
||||
fmt.Printf("[GeminiWeb] Response complete (%d chars)\n", len(response))
|
||||
return nil
|
||||
}
|
||||
|
||||
// buildPromptFromMessages 從訊息列表建構提示詞
|
||||
func buildPromptFromMessages(messages []apitypes.Message) string {
|
||||
var prompt string
|
||||
for _, m := range messages {
|
||||
|
|
@ -179,74 +154,43 @@ func buildPromptFromMessages(messages []apitypes.Message) string {
|
|||
return prompt
|
||||
}
|
||||
|
||||
// RunLogin 執行登入流程(供 gemini-login 命令使用)
|
||||
func RunLogin(cfg config.BridgeConfig, sessionName string) error {
|
||||
if sessionName == "" {
|
||||
sessionName = fmt.Sprintf("session-%d", time.Now().Unix())
|
||||
sessionName = "default-session"
|
||||
}
|
||||
|
||||
pool, err := NewSessionPool(cfg.GeminiAccountDir, cfg.GeminiMaxSessions)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init pool: %w", err)
|
||||
}
|
||||
|
||||
session, err := pool.CreateSession(sessionName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create session: %w", err)
|
||||
sessionDir := filepath.Join(cfg.GeminiAccountDir, sessionName)
|
||||
if err := os.MkdirAll(sessionDir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create session dir: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Starting browser for login. Session: %s\n", sessionName)
|
||||
fmt.Printf("Session directory: %s\n", sessionDir)
|
||||
fmt.Println("Please log in to your Gemini account in the browser window.")
|
||||
fmt.Println("Press Ctrl+C when you have completed the login...")
|
||||
|
||||
browser, err := NewBrowser(true)
|
||||
manager, err := NewBrowserManager(sessionDir, true) // visible=true
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create browser: %w", err)
|
||||
return fmt.Errorf("failed to create browser manager: %w", err)
|
||||
}
|
||||
defer browser.Close()
|
||||
|
||||
page, err := browser.NewPage()
|
||||
if err := manager.Launch(); err != nil {
|
||||
return fmt.Errorf("failed to launch browser: %w", err)
|
||||
}
|
||||
defer manager.Close()
|
||||
|
||||
page, err := manager.GetPage()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create page: %w", err)
|
||||
return fmt.Errorf("failed to get page: %w", err)
|
||||
}
|
||||
|
||||
if err := NavigateToGemini(page); err != nil {
|
||||
return fmt.Errorf("failed to navigate: %w", err)
|
||||
}
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
|
||||
// 等待用戶手動登入...
|
||||
// 使用 Ctrl+C 退出,瀏覽器資料會自動保存在 userDataDir
|
||||
|
||||
<-sigChan
|
||||
|
||||
cookies, err := GetPageCookies(page)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get cookies: %w", err)
|
||||
}
|
||||
|
||||
if err := SaveCookiesToFile(cookies, session.CookieFile); err != nil {
|
||||
return fmt.Errorf("failed to save cookies: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Session saved successfully: %s\n", sessionName)
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetPageCookies(page *rod.Page) ([]Cookie, error) {
|
||||
cookies, err := page.Cookies([]string{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||
}
|
||||
|
||||
var result []Cookie
|
||||
for _, c := range cookies {
|
||||
result = append(result, Cookie{
|
||||
Name: c.Name,
|
||||
Value: c.Value,
|
||||
Domain: c.Domain,
|
||||
Path: c.Path,
|
||||
HTTPOnly: c.HTTPOnly,
|
||||
Secure: c.Secure,
|
||||
})
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue