feat: Implement Gemini Web provider with model selection
- Fix handler to use PlaywrightProvider instead of old Provider - Add model selection for Gemini Web (fast/thinking/pro) - Improve response extraction to remove 'Gemini said' prefix - Add progress logging for headless mode - Fix navigation to use Domcontentloaded instead of Networkidle - Add proper input field selectors (.ql-editor) - Improve response completion detection with stability check
This commit is contained in:
parent
3fec6e55eb
commit
4517b07f33
|
|
@ -59,8 +59,15 @@ func HandleGeminiChatCompletions(w http.ResponseWriter, r *http.Request, cfg con
|
|||
logger.LogRequestStart(method, pathname, rawModel, cfg.TimeoutMs, isStream)
|
||||
start := time.Now().UnixMilli()
|
||||
|
||||
// 創建 Gemini provider
|
||||
provider := geminiweb.NewProvider(cfg)
|
||||
// 創建 Gemini provider (使用 Playwright)
|
||||
provider, provErr := geminiweb.NewPlaywrightProvider(cfg)
|
||||
if provErr != nil {
|
||||
logger.LogAgentError(cfg.SessionsLogPath, method, pathname, remoteAddress, -1, provErr.Error())
|
||||
httputil.WriteJSON(w, 500, map[string]interface{}{
|
||||
"error": map[string]string{"message": provErr.Error(), "code": "provider_error"},
|
||||
}, nil)
|
||||
return
|
||||
}
|
||||
|
||||
if isStream {
|
||||
httputil.WriteSSEHeaders(w, nil)
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ func NewProvider(cfg config.BridgeConfig) (Provider, error) {
|
|||
case "cursor":
|
||||
return cursor.NewProvider(cfg), nil
|
||||
case "gemini-web":
|
||||
// 使用新的 Playwright provider
|
||||
return geminiweb.NewPlaywrightProvider(cfg)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown provider: %s", providerType)
|
||||
|
|
|
|||
|
|
@ -135,38 +135,26 @@ func (p *PlaywrightProvider) Generate(ctx context.Context, model string, message
|
|||
if !strings.Contains(currentURL, "gemini.google.com") {
|
||||
fmt.Println("[GeminiWeb] Navigating to Gemini...")
|
||||
if _, err := p.page.Goto("https://gemini.google.com/app", playwright.PageGotoOptions{
|
||||
WaitUntil: playwright.WaitUntilStateNetworkidle,
|
||||
Timeout: playwright.Float(30000),
|
||||
WaitUntil: playwright.WaitUntilStateDomcontentloaded,
|
||||
Timeout: playwright.Float(60000),
|
||||
}); err != nil {
|
||||
return fmt.Errorf("failed to navigate: %w", err)
|
||||
}
|
||||
// 額外等待 JavaScript 載入
|
||||
fmt.Println("[GeminiWeb] Waiting for page to initialize...")
|
||||
time.Sleep(3 * time.Second)
|
||||
}
|
||||
|
||||
// 3. 調試模式:如果可見,等待用戶確認
|
||||
// 3. 調試模式:等待用戶確認
|
||||
if p.cfg.GeminiBrowserVisible {
|
||||
fmt.Println("\n" + strings.Repeat("=", 60))
|
||||
fmt.Println("DEBUG MODE: Browser is visible")
|
||||
fmt.Println("Please check the browser and press ENTER when ready...")
|
||||
fmt.Println("If you see login page, please log in first")
|
||||
fmt.Println(strings.Repeat("=", 60))
|
||||
fmt.Println("\n" + strings.Repeat("=", 70))
|
||||
fmt.Println("🔍 調試模式:瀏覽器已開啟")
|
||||
fmt.Println("請檢查瀏覽器畫面,然後按 ENTER 繼續...")
|
||||
fmt.Println("如果有問題,請查看: /tmp/gemini-debug.*")
|
||||
fmt.Println(strings.Repeat("=", 70))
|
||||
|
||||
// 等待用戶按 Enter
|
||||
var input string
|
||||
fmt.Scanln(&input)
|
||||
|
||||
// 截圖保存
|
||||
screenshotPath := "/tmp/gemini-debug.png"
|
||||
if _, err := p.page.Screenshot(playwright.PageScreenshotOptions{
|
||||
Path: playwright.String(screenshotPath),
|
||||
}); err == nil {
|
||||
fmt.Printf("[GeminiWeb] Screenshot saved to: %s\n", screenshotPath)
|
||||
}
|
||||
|
||||
// 輸出頁面信息
|
||||
fmt.Printf("[GeminiWeb] Current URL: %s\n", p.page.URL())
|
||||
if title, err := p.page.Title(); err == nil {
|
||||
fmt.Printf("[GeminiWeb] Page Title: %s\n", title)
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 等待頁面完全載入(project-golem 策略)
|
||||
|
|
@ -197,14 +185,19 @@ func (p *PlaywrightProvider) Generate(ctx context.Context, model string, message
|
|||
fmt.Println("========================================\n")
|
||||
}
|
||||
} else {
|
||||
fmt.Println("[GeminiWeb] Logged in")
|
||||
fmt.Println("[GeminiWeb] ✓ Logged in")
|
||||
}
|
||||
|
||||
// 5. 建構提示詞
|
||||
// 5. 選擇模型(如果支援)
|
||||
if err := p.selectModel(model); err != nil {
|
||||
fmt.Printf("[GeminiWeb] Warning: model selection failed: %v\n", err)
|
||||
}
|
||||
|
||||
// 6. 建構提示詞
|
||||
prompt := buildPromptFromMessagesPlaywright(messages)
|
||||
fmt.Printf("[GeminiWeb] Typing prompt (%d chars)...\n", len(prompt))
|
||||
|
||||
// 6. 輸入文字(使用 Playwright 的 Auto-wait)
|
||||
// 7. 輸入文字(使用 Playwright 的 Auto-wait)
|
||||
if err := p.typeInput(prompt); err != nil {
|
||||
return fmt.Errorf("failed to type: %w", err)
|
||||
}
|
||||
|
|
@ -287,12 +280,13 @@ func (p *PlaywrightProvider) waitForPageReady() error {
|
|||
|
||||
// 2. 嘗試多種等待策略
|
||||
inputSelectors := []string{
|
||||
".ql-editor.ql-blank",
|
||||
".ql-editor",
|
||||
"div[contenteditable='true'][role='textbox']",
|
||||
"div[contenteditable='true']",
|
||||
".ProseMirror",
|
||||
"rich-textarea",
|
||||
"div[role='textbox']",
|
||||
"div[contenteditable='true']",
|
||||
"textarea",
|
||||
"input[type='text']",
|
||||
}
|
||||
|
||||
// 策略 A: 等待任一輸入框出現
|
||||
|
|
@ -318,12 +312,13 @@ func (p *PlaywrightProvider) waitForPageReady() error {
|
|||
() => {
|
||||
// 檢查所有可能的輸入元素
|
||||
const selectors = [
|
||||
'.ql-editor.ql-blank',
|
||||
'.ql-editor',
|
||||
'div[contenteditable="true"][role="textbox"]',
|
||||
'div[contenteditable="true"]',
|
||||
'.ProseMirror',
|
||||
'rich-textarea',
|
||||
'div[role="textbox"]',
|
||||
'div[contenteditable="true"]',
|
||||
'textarea',
|
||||
'input[type="text"]'
|
||||
'textarea'
|
||||
];
|
||||
|
||||
for (const sel of selectors) {
|
||||
|
|
@ -407,10 +402,12 @@ func (p *PlaywrightProvider) typeInput(text string) error {
|
|||
fmt.Println("[GeminiWeb] Looking for input field...")
|
||||
|
||||
selectors := []string{
|
||||
".ql-editor.ql-blank",
|
||||
".ql-editor",
|
||||
"div[contenteditable='true'][role='textbox']",
|
||||
"div[contenteditable='true']",
|
||||
".ProseMirror",
|
||||
"rich-textarea",
|
||||
"div[role='textbox'][contenteditable='true']",
|
||||
"div[contenteditable='true']",
|
||||
"textarea",
|
||||
}
|
||||
|
||||
|
|
@ -485,15 +482,8 @@ func (p *PlaywrightProvider) sendMessage() error {
|
|||
|
||||
// extractResponse 提取回應
|
||||
func (p *PlaywrightProvider) extractResponse() (string, error) {
|
||||
selectors := []string{
|
||||
".model-response-text",
|
||||
".message-content",
|
||||
".markdown",
|
||||
".prose",
|
||||
"model-response",
|
||||
}
|
||||
|
||||
var lastText string
|
||||
var stableCount int
|
||||
lastUpdate := time.Now()
|
||||
timeout := 120 * time.Second
|
||||
startTime := time.Now()
|
||||
|
|
@ -501,46 +491,139 @@ func (p *PlaywrightProvider) extractResponse() (string, error) {
|
|||
for time.Since(startTime) < timeout {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
// 嘗試所有選擇器
|
||||
for _, sel := range selectors {
|
||||
locator := p.page.Locator(sel)
|
||||
count, _ := locator.Count()
|
||||
|
||||
if count > 0 {
|
||||
// 取最後一個元素
|
||||
lastEl := locator.Last()
|
||||
text, err := lastEl.TextContent()
|
||||
if err != nil {
|
||||
continue
|
||||
// 使用 JavaScript 提取回應文字(更精確)
|
||||
result, err := p.page.Evaluate(`
|
||||
() => {
|
||||
// 尋找所有可能的回應容器
|
||||
const selectors = [
|
||||
'model-response',
|
||||
'.model-response',
|
||||
'message-content',
|
||||
'.message-content'
|
||||
];
|
||||
|
||||
for (const sel of selectors) {
|
||||
const el = document.querySelector(sel);
|
||||
if (el) {
|
||||
// 嘗試找markdown內容
|
||||
const markdown = el.querySelector('.markdown, .prose, [class*="markdown"]');
|
||||
if (markdown && markdown.innerText.trim()) {
|
||||
let text = markdown.innerText.trim();
|
||||
// 移除常見的標籤前綴
|
||||
text = text.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[::]\s*\n*/i, '').trim();
|
||||
return { text: text, source: sel + ' .markdown' };
|
||||
}
|
||||
|
||||
// 嘗試找純文字內容(排除標籤)
|
||||
let textContent = el.innerText.trim();
|
||||
if (textContent) {
|
||||
// 移除常見的標籤前綴
|
||||
textContent = textContent.replace(/^Gemini said\s*\n*/i, '').replace(/^Gemini\s*[::]\s*\n*/i, '').trim();
|
||||
return { text: textContent, source: sel };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { text: '', source: 'none' };
|
||||
}
|
||||
`)
|
||||
|
||||
if err == nil {
|
||||
if m, ok := result.(map[string]interface{}); ok {
|
||||
text, _ := m["text"].(string)
|
||||
text = strings.TrimSpace(text)
|
||||
|
||||
if text != "" && len(text) > len(lastText) {
|
||||
lastText = text
|
||||
lastUpdate = time.Now()
|
||||
fmt.Printf("[GeminiWeb] Response length: %d\n", len(text))
|
||||
stableCount = 0
|
||||
fmt.Printf("[GeminiWeb] Response: %d chars\n", len(text))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 檢查是否完成(2秒內無新內容)
|
||||
if time.Since(lastUpdate) > 2*time.Second && lastText != "" {
|
||||
// 最終檢查:停止按鈕是否還存在
|
||||
stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止']")
|
||||
count, _ := stopBtn.Count()
|
||||
// 檢查是否完成(需要連續 3 次穩定)
|
||||
if time.Since(lastUpdate) > 500*time.Millisecond && lastText != "" {
|
||||
stableCount++
|
||||
if stableCount >= 3 {
|
||||
// 最終檢查:停止按鈕是否還存在
|
||||
stopBtn := p.page.Locator("button[aria-label*='Stop'], button[aria-label*='停止'], button[data-test-id='stop-button']")
|
||||
count, _ := stopBtn.Count()
|
||||
|
||||
if count == 0 {
|
||||
return lastText, nil
|
||||
if count == 0 {
|
||||
fmt.Println("[GeminiWeb] ✓ Response complete")
|
||||
return lastText, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if lastText != "" {
|
||||
fmt.Println("[GeminiWeb] ✓ Response complete (timeout)")
|
||||
return lastText, nil
|
||||
}
|
||||
return "", fmt.Errorf("response timeout")
|
||||
}
|
||||
|
||||
// selectModel 選擇 Gemini 模型
|
||||
// Gemini Web 只有三種模型:fast, thinking, pro
|
||||
func (p *PlaywrightProvider) selectModel(model string) error {
|
||||
// 映射模型名稱到 Gemini Web 的模型選擇器
|
||||
modelMap := map[string]string{
|
||||
"fast": "Fast",
|
||||
"thinking": "Thinking",
|
||||
"pro": "Pro",
|
||||
"gemini-fast": "Fast",
|
||||
"gemini-thinking": "Thinking",
|
||||
"gemini-pro": "Pro",
|
||||
"gemini-2.0-fast": "Fast",
|
||||
"gemini-2.0-flash": "Fast", // 相容舊名稱
|
||||
"gemini-2.5-pro": "Pro",
|
||||
"gemini-2.5-pro-thinking": "Thinking",
|
||||
}
|
||||
|
||||
// 從完整模型名稱中提取類型
|
||||
modelType := ""
|
||||
modelLower := strings.ToLower(model)
|
||||
for key, value := range modelMap {
|
||||
if strings.Contains(modelLower, strings.ToLower(key)) || modelLower == strings.ToLower(key) {
|
||||
modelType = value
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if modelType == "" {
|
||||
// 默認使用 Fast
|
||||
fmt.Printf("[GeminiWeb] Unknown model '%s', defaulting to Fast\n", model)
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("[GeminiWeb] Selecting model: %s\n", modelType)
|
||||
|
||||
// 點擊模型選擇器
|
||||
modelSelector := p.page.Locator("button[aria-label*='Model'], button[aria-label*='模型'], [data-test-id='model-selector']")
|
||||
if count, _ := modelSelector.Count(); count > 0 {
|
||||
if err := modelSelector.First().Click(); err != nil {
|
||||
fmt.Printf("[GeminiWeb] Warning: could not click model selector: %v\n", err)
|
||||
} else {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
// 選擇對應的模型選項
|
||||
optionSelector := p.page.Locator(fmt.Sprintf("button:has-text('%s'), [role='menuitem']:has-text('%s')", modelType, modelType))
|
||||
if count, _ := optionSelector.Count(); count > 0 {
|
||||
if err := optionSelector.First().Click(); err != nil {
|
||||
fmt.Printf("[GeminiWeb] Warning: could not select model: %v\n", err)
|
||||
} else {
|
||||
fmt.Printf("[GeminiWeb] ✓ Model selected: %s\n", modelType)
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// buildPromptFromMessages 從訊息列表建構提示詞
|
||||
func buildPromptFromMessagesPlaywright(messages []apitypes.Message) string {
|
||||
var prompt string
|
||||
|
|
|
|||
Loading…
Reference in New Issue