package placement import ( "bytes" "context" "encoding/json" "fmt" "os" "os/exec" "path/filepath" "strings" "time" ) // CrawlerSearchFn runs Playwright keyword search with a logged-in browser session. type CrawlerSearchFn func(ctx context.Context, member MemberContext, keyword string, limit int) ([]DiscoverPost, error) type execCrawlerInput struct { StorageState string `json:"storage_state"` Query string `json:"query"` Limit int `json:"limit"` } type execCrawlerPost struct { Text string `json:"text"` Permalink string `json:"permalink"` ExternalID string `json:"externalId"` AuthorName string `json:"authorName"` LikeCount int `json:"likeCount"` ReplyCount int `json:"replyCount"` AuthorVerified bool `json:"authorVerified"` FollowerCount int `json:"followerCount"` } type execCrawlerOutput struct { Posts []execCrawlerPost `json:"posts"` } // RunExecCrawlerSearch invokes the Node Playwright CLI (tsx) for keyword search. func RunExecCrawlerSearch(ctx context.Context, storageState, keyword string, limit int) ([]DiscoverPost, error) { keyword = strings.TrimSpace(keyword) if keyword == "" { return nil, nil } storageState = strings.TrimSpace(storageState) if storageState == "" { return nil, fmt.Errorf("找不到 Chrome session,請先到連線頁同步 Threads 登入態") } if limit <= 0 { limit = 12 } repoRoot, cliPath, err := resolveKeywordSearchCLI() if err != nil { return nil, err } payload, err := json.Marshal(execCrawlerInput{ StorageState: storageState, Query: keyword, Limit: limit, }) if err != nil { return nil, err } runCtx, cancel := context.WithTimeout(ctx, 3*time.Minute) defer cancel() cmd := exec.CommandContext(runCtx, "npx", "tsx", cliPath) cmd.Dir = repoRoot cmd.Stdin = bytes.NewReader(payload) var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { msg := strings.TrimSpace(stderr.String()) if msg == "" { msg = err.Error() } return nil, fmt.Errorf("crawler search failed: %s", msg) } var out execCrawlerOutput if err := json.Unmarshal(stdout.Bytes(), &out); err != nil { return nil, fmt.Errorf("crawler search output parse failed: %w", err) } posts := make([]DiscoverPost, 0, len(out.Posts)) for _, item := range out.Posts { text := strings.TrimSpace(item.Text) if text == "" { continue } author := strings.TrimSpace(item.AuthorName) permalink := strings.TrimSpace(item.Permalink) extID := strings.TrimSpace(item.ExternalID) posts = append(posts, DiscoverPost{ Text: text, Permalink: permalink, ExternalID: extID, Author: author, AuthorVerified: item.AuthorVerified, FollowerCount: item.FollowerCount, LikeCount: item.LikeCount, ReplyCount: item.ReplyCount, Source: DiscoverCrawler, }) } return posts, nil } func resolveKeywordSearchCLI() (repoRoot, cliPath string, err error) { if root := strings.TrimSpace(os.Getenv("HAIXUN_REPO_ROOT")); root != "" { cli := filepath.Join(root, "haixun-backend", "worker", "threads-keyword-search-cli.ts") if fileExists(cli) { return root, cli, nil } } cwd, err := os.Getwd() if err != nil { return "", "", fmt.Errorf("resolve crawler cli: %w", err) } dir := cwd for i := 0; i < 6; i++ { cli := filepath.Join(dir, "haixun-backend", "worker", "threads-keyword-search-cli.ts") if fileExists(cli) { return dir, cli, nil } cli = filepath.Join(dir, "worker", "threads-keyword-search-cli.ts") if fileExists(cli) { return dir, cli, nil } parent := filepath.Dir(dir) if parent == dir { break } dir = parent } return "", "", fmt.Errorf("找不到 threads-keyword-search-cli.ts,請設定 HAIXUN_REPO_ROOT") } func fileExists(path string) bool { info, err := os.Stat(path) return err == nil && !info.IsDir() } // CrawlerKeywordFromQuery extracts plain keyword from Brave-style query strings. func CrawlerKeywordFromQuery(query, keyword string) string { if k := strings.TrimSpace(keyword); k != "" { return k } q := strings.TrimSpace(query) q = strings.TrimPrefix(q, "site:threads.net ") q = strings.Trim(q, `"`) if idx := strings.Index(q, " after:"); idx > 0 { q = strings.TrimSpace(q[:idx]) } q = strings.Trim(q, `"`) if idx := strings.Index(q, " 請問"); idx > 0 { q = strings.TrimSpace(q[:idx]) } return strings.Trim(q, `"`) }