166 lines
4.4 KiB
Go
166 lines
4.4 KiB
Go
package placement
|
||
|
||
import (
|
||
"bytes"
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"os"
|
||
"os/exec"
|
||
"path/filepath"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
// CrawlerSearchFn runs Playwright keyword search with a logged-in browser session.
|
||
type CrawlerSearchFn func(ctx context.Context, member MemberContext, keyword string, limit int) ([]DiscoverPost, error)
|
||
|
||
type execCrawlerInput struct {
|
||
StorageState string `json:"storage_state"`
|
||
Query string `json:"query"`
|
||
Limit int `json:"limit"`
|
||
}
|
||
|
||
type execCrawlerPost struct {
|
||
Text string `json:"text"`
|
||
Permalink string `json:"permalink"`
|
||
ExternalID string `json:"externalId"`
|
||
AuthorName string `json:"authorName"`
|
||
LikeCount int `json:"likeCount"`
|
||
ReplyCount int `json:"replyCount"`
|
||
AuthorVerified bool `json:"authorVerified"`
|
||
FollowerCount int `json:"followerCount"`
|
||
}
|
||
|
||
type execCrawlerOutput struct {
|
||
Posts []execCrawlerPost `json:"posts"`
|
||
}
|
||
|
||
// RunExecCrawlerSearch invokes the Node Playwright CLI (tsx) for keyword search.
|
||
func RunExecCrawlerSearch(ctx context.Context, storageState, keyword string, limit int) ([]DiscoverPost, error) {
|
||
keyword = strings.TrimSpace(keyword)
|
||
if keyword == "" {
|
||
return nil, nil
|
||
}
|
||
storageState = strings.TrimSpace(storageState)
|
||
if storageState == "" {
|
||
return nil, fmt.Errorf("找不到 Chrome session,請先到連線頁同步 Threads 登入態")
|
||
}
|
||
if limit <= 0 {
|
||
limit = 12
|
||
}
|
||
|
||
repoRoot, cliPath, err := resolveKeywordSearchCLI()
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
payload, err := json.Marshal(execCrawlerInput{
|
||
StorageState: storageState,
|
||
Query: keyword,
|
||
Limit: limit,
|
||
})
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
runCtx, cancel := context.WithTimeout(ctx, 3*time.Minute)
|
||
defer cancel()
|
||
|
||
cmd := exec.CommandContext(runCtx, "npx", "tsx", cliPath)
|
||
cmd.Dir = repoRoot
|
||
cmd.Stdin = bytes.NewReader(payload)
|
||
var stdout, stderr bytes.Buffer
|
||
cmd.Stdout = &stdout
|
||
cmd.Stderr = &stderr
|
||
if err := cmd.Run(); err != nil {
|
||
msg := strings.TrimSpace(stderr.String())
|
||
if msg == "" {
|
||
msg = err.Error()
|
||
}
|
||
return nil, fmt.Errorf("crawler search failed: %s", msg)
|
||
}
|
||
|
||
var out execCrawlerOutput
|
||
if err := json.Unmarshal(stdout.Bytes(), &out); err != nil {
|
||
return nil, fmt.Errorf("crawler search output parse failed: %w", err)
|
||
}
|
||
|
||
posts := make([]DiscoverPost, 0, len(out.Posts))
|
||
for _, item := range out.Posts {
|
||
text := strings.TrimSpace(item.Text)
|
||
if text == "" {
|
||
continue
|
||
}
|
||
author := strings.TrimSpace(item.AuthorName)
|
||
permalink := strings.TrimSpace(item.Permalink)
|
||
extID := strings.TrimSpace(item.ExternalID)
|
||
posts = append(posts, DiscoverPost{
|
||
Text: text,
|
||
Permalink: permalink,
|
||
ExternalID: extID,
|
||
Author: author,
|
||
AuthorVerified: item.AuthorVerified,
|
||
FollowerCount: item.FollowerCount,
|
||
LikeCount: item.LikeCount,
|
||
ReplyCount: item.ReplyCount,
|
||
Source: DiscoverCrawler,
|
||
})
|
||
}
|
||
return posts, nil
|
||
}
|
||
|
||
func resolveKeywordSearchCLI() (repoRoot, cliPath string, err error) {
|
||
if root := strings.TrimSpace(os.Getenv("HAIXUN_REPO_ROOT")); root != "" {
|
||
cli := filepath.Join(root, "haixun-backend", "worker", "threads-keyword-search-cli.ts")
|
||
if fileExists(cli) {
|
||
return root, cli, nil
|
||
}
|
||
}
|
||
|
||
cwd, err := os.Getwd()
|
||
if err != nil {
|
||
return "", "", fmt.Errorf("resolve crawler cli: %w", err)
|
||
}
|
||
dir := cwd
|
||
for i := 0; i < 6; i++ {
|
||
cli := filepath.Join(dir, "haixun-backend", "worker", "threads-keyword-search-cli.ts")
|
||
if fileExists(cli) {
|
||
return dir, cli, nil
|
||
}
|
||
cli = filepath.Join(dir, "worker", "threads-keyword-search-cli.ts")
|
||
if fileExists(cli) {
|
||
return dir, cli, nil
|
||
}
|
||
parent := filepath.Dir(dir)
|
||
if parent == dir {
|
||
break
|
||
}
|
||
dir = parent
|
||
}
|
||
return "", "", fmt.Errorf("找不到 threads-keyword-search-cli.ts,請設定 HAIXUN_REPO_ROOT")
|
||
}
|
||
|
||
func fileExists(path string) bool {
|
||
info, err := os.Stat(path)
|
||
return err == nil && !info.IsDir()
|
||
}
|
||
|
||
// CrawlerKeywordFromQuery extracts plain keyword from Brave-style query strings.
|
||
func CrawlerKeywordFromQuery(query, keyword string) string {
|
||
if k := strings.TrimSpace(keyword); k != "" {
|
||
return k
|
||
}
|
||
q := strings.TrimSpace(query)
|
||
q = strings.TrimPrefix(q, "site:threads.net ")
|
||
q = strings.Trim(q, `"`)
|
||
if idx := strings.Index(q, " after:"); idx > 0 {
|
||
q = strings.TrimSpace(q[:idx])
|
||
}
|
||
q = strings.Trim(q, `"`)
|
||
if idx := strings.Index(q, " 請問"); idx > 0 {
|
||
q = strings.TrimSpace(q[:idx])
|
||
}
|
||
return strings.Trim(q, `"`)
|
||
}
|