From bef521504e2e0d0cedc494e47e9b6edf42a3eb8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=80=A7=E9=A9=8A?= Date: Fri, 3 Apr 2026 01:31:37 +0800 Subject: [PATCH] feat: Add debug mode with screenshot and page analysis Features: - Wait for user ENTER in visible mode (GEMINI_BROWSER_VISIBLE=true) - Save screenshot to /tmp/gemini-debug.png - Save HTML to /tmp/gemini-debug.html - JavaScript-based element detection - Multi-strategy wait (A: selectors, B: time, C: JS, D: debug) - Detailed logging of each strategy attempt - Page structure dump in debug mode This helps identify why input field is not found --- .../geminiweb/playwright_provider.go | 139 +++++++++++++++--- 1 file changed, 115 insertions(+), 24 deletions(-) diff --git a/internal/providers/geminiweb/playwright_provider.go b/internal/providers/geminiweb/playwright_provider.go index dd2a0dc..ef2317b 100644 --- a/internal/providers/geminiweb/playwright_provider.go +++ b/internal/providers/geminiweb/playwright_provider.go @@ -134,10 +134,46 @@ func (p *PlaywrightProvider) Generate(ctx context.Context, model string, message } } - // 3. 等待頁面完全載入(project-golem 策略) + // 3. 調試模式:如果可見,等待用戶確認 + if p.cfg.GeminiBrowserVisible { + fmt.Println("\n" + strings.Repeat("=", 60)) + fmt.Println("DEBUG MODE: Browser is visible") + fmt.Println("Please check the browser and press ENTER when ready...") + fmt.Println("If you see login page, please log in first") + fmt.Println(strings.Repeat("=", 60)) + + // 等待用戶按 Enter + var input string + fmt.Scanln(&input) + + // 截圖保存 + screenshotPath := "/tmp/gemini-debug.png" + if _, err := p.page.Screenshot(playwright.PageScreenshotOptions{ + Path: playwright.String(screenshotPath), + }); err == nil { + fmt.Printf("[GeminiWeb] Screenshot saved to: %s\n", screenshotPath) + } + + // 輸出頁面信息 + fmt.Printf("[GeminiWeb] Current URL: %s\n", p.page.URL()) + if title, err := p.page.Title(); err == nil { + fmt.Printf("[GeminiWeb] Page Title: %s\n", title) + } + } + + // 4. 等待頁面完全載入(project-golem 策略) fmt.Println("[GeminiWeb] Waiting for page to be ready...") if err := p.waitForPageReady(); err != nil { fmt.Printf("[GeminiWeb] Warning: %v\n", err) + + // 額外調試:輸出頁面 HTML 結構 + if p.cfg.GeminiBrowserVisible { + html, _ := p.page.Content() + debugPath := "/tmp/gemini-debug.html" + if err := os.WriteFile(debugPath, []byte(html), 0644); err == nil { + fmt.Printf("[GeminiWeb] HTML saved to: %s\n", debugPath) + } + } } // 4. 檢查登入狀態 @@ -211,46 +247,101 @@ func (p *PlaywrightProvider) waitForPageReady() error { Timeout: playwright.Float(5000), }) - // 2. 等待輸入框出現(關鍵!) + // 2. 嘗試多種等待策略 inputSelectors := []string{ ".ProseMirror", "rich-textarea", "div[role='textbox']", "div[contenteditable='true']", "textarea", + "input[type='text']", } - var lastErr error - for _, sel := range inputSelectors { - fmt.Printf(" Checking for: %s\n", sel) - locator := p.page.Locator(sel) - if err := locator.WaitFor(playwright.LocatorWaitForOptions{ - Timeout: playwright.Float(10000), - State: playwright.WaitForSelectorStateVisible, - }); err == nil { - fmt.Printf(" ✓ Input field found: %s\n", sel) - return nil - } else { - lastErr = err - } - } - - // 3. 如果都找不到,給頁面更多時間 - fmt.Println("[GeminiWeb] Input not found immediately, waiting longer...") - time.Sleep(3 * time.Second) - - for _, sel := range inputSelectors { + // 策略 A: 等待任一輸入框出現 + for i, sel := range inputSelectors { + fmt.Printf(" [%d/%d] Waiting for: %s\n", i+1, len(inputSelectors), sel) locator := p.page.Locator(sel) if err := locator.WaitFor(playwright.LocatorWaitForOptions{ Timeout: playwright.Float(5000), State: playwright.WaitForSelectorStateVisible, }); err == nil { - fmt.Printf(" ✓ Input field found after wait: %s\n", sel) + fmt.Printf(" ✓ Input field found: %s\n", sel) return nil } } - return fmt.Errorf("input field not ready: %w", lastErr) + // 策略 B: 等待頁面完全載入 + fmt.Println("[GeminiWeb] Waiting for page load...") + time.Sleep(3 * time.Second) + + // 策略 C: 使用 JavaScript 檢查 + fmt.Println("[GeminiWeb] Checking with JavaScript...") + result, err := p.page.Evaluate(` + () => { + // 檢查所有可能的輸入元素 + const selectors = [ + '.ProseMirror', + 'rich-textarea', + 'div[role="textbox"]', + 'div[contenteditable="true"]', + 'textarea', + 'input[type="text"]' + ]; + + for (const sel of selectors) { + const el = document.querySelector(sel); + if (el) { + return { + found: true, + selector: sel, + tagName: el.tagName, + className: el.className, + visible: el.offsetParent !== null + }; + } + } + + return { found: false }; + } + `) + + if err == nil { + if m, ok := result.(map[string]interface{}); ok { + if found, _ := m["found"].(bool); found { + sel, _ := m["selector"].(string) + fmt.Printf(" ✓ JavaScript found: %s\n", sel) + return nil + } + } + } + + // 策略 D: 調試模式 - 輸出頁面結構 + if p.cfg.GeminiBrowserVisible { + fmt.Println("[GeminiWeb].dump: Page structure analysis") + _, _ = p.page.Evaluate(` + () => { + const allElements = document.querySelectorAll('*'); + const inputLike = []; + for (const el of allElements) { + if (el.contentEditable === 'true' || + el.role === 'textbox' || + el.tagName === 'TEXTAREA' || + el.tagName === 'INPUT') { + inputLike.push({ + tag: el.tagName, + class: el.className, + id: el.id, + role: el.role, + contentEditable: el.contentEditable + }); + } + } + console.log('Input-like elements:', inputLike); + } + `) + } + + return fmt.Errorf("no input field found after all strategies") } // isLoggedIn 檢查是否已登入