166 lines
4.4 KiB
Go
166 lines
4.4 KiB
Go
|
|
package placement
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"bytes"
|
|||
|
|
"context"
|
|||
|
|
"encoding/json"
|
|||
|
|
"fmt"
|
|||
|
|
"os"
|
|||
|
|
"os/exec"
|
|||
|
|
"path/filepath"
|
|||
|
|
"strings"
|
|||
|
|
"time"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
// CrawlerSearchFn runs Playwright keyword search with a logged-in browser session.
|
|||
|
|
type CrawlerSearchFn func(ctx context.Context, member MemberContext, keyword string, limit int) ([]DiscoverPost, error)
|
|||
|
|
|
|||
|
|
type execCrawlerInput struct {
|
|||
|
|
StorageState string `json:"storage_state"`
|
|||
|
|
Query string `json:"query"`
|
|||
|
|
Limit int `json:"limit"`
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
type execCrawlerPost struct {
|
|||
|
|
Text string `json:"text"`
|
|||
|
|
Permalink string `json:"permalink"`
|
|||
|
|
ExternalID string `json:"externalId"`
|
|||
|
|
AuthorName string `json:"authorName"`
|
|||
|
|
LikeCount int `json:"likeCount"`
|
|||
|
|
ReplyCount int `json:"replyCount"`
|
|||
|
|
AuthorVerified bool `json:"authorVerified"`
|
|||
|
|
FollowerCount int `json:"followerCount"`
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
type execCrawlerOutput struct {
|
|||
|
|
Posts []execCrawlerPost `json:"posts"`
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// RunExecCrawlerSearch invokes the Node Playwright CLI (tsx) for keyword search.
|
|||
|
|
func RunExecCrawlerSearch(ctx context.Context, storageState, keyword string, limit int) ([]DiscoverPost, error) {
|
|||
|
|
keyword = strings.TrimSpace(keyword)
|
|||
|
|
if keyword == "" {
|
|||
|
|
return nil, nil
|
|||
|
|
}
|
|||
|
|
storageState = strings.TrimSpace(storageState)
|
|||
|
|
if storageState == "" {
|
|||
|
|
return nil, fmt.Errorf("找不到 Chrome session,請先到連線頁同步 Threads 登入態")
|
|||
|
|
}
|
|||
|
|
if limit <= 0 {
|
|||
|
|
limit = 12
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
repoRoot, cliPath, err := resolveKeywordSearchCLI()
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, err
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
payload, err := json.Marshal(execCrawlerInput{
|
|||
|
|
StorageState: storageState,
|
|||
|
|
Query: keyword,
|
|||
|
|
Limit: limit,
|
|||
|
|
})
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, err
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
runCtx, cancel := context.WithTimeout(ctx, 3*time.Minute)
|
|||
|
|
defer cancel()
|
|||
|
|
|
|||
|
|
cmd := exec.CommandContext(runCtx, "npx", "tsx", cliPath)
|
|||
|
|
cmd.Dir = repoRoot
|
|||
|
|
cmd.Stdin = bytes.NewReader(payload)
|
|||
|
|
var stdout, stderr bytes.Buffer
|
|||
|
|
cmd.Stdout = &stdout
|
|||
|
|
cmd.Stderr = &stderr
|
|||
|
|
if err := cmd.Run(); err != nil {
|
|||
|
|
msg := strings.TrimSpace(stderr.String())
|
|||
|
|
if msg == "" {
|
|||
|
|
msg = err.Error()
|
|||
|
|
}
|
|||
|
|
return nil, fmt.Errorf("crawler search failed: %s", msg)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
var out execCrawlerOutput
|
|||
|
|
if err := json.Unmarshal(stdout.Bytes(), &out); err != nil {
|
|||
|
|
return nil, fmt.Errorf("crawler search output parse failed: %w", err)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
posts := make([]DiscoverPost, 0, len(out.Posts))
|
|||
|
|
for _, item := range out.Posts {
|
|||
|
|
text := strings.TrimSpace(item.Text)
|
|||
|
|
if text == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
author := strings.TrimSpace(item.AuthorName)
|
|||
|
|
permalink := strings.TrimSpace(item.Permalink)
|
|||
|
|
extID := strings.TrimSpace(item.ExternalID)
|
|||
|
|
posts = append(posts, DiscoverPost{
|
|||
|
|
Text: text,
|
|||
|
|
Permalink: permalink,
|
|||
|
|
ExternalID: extID,
|
|||
|
|
Author: author,
|
|||
|
|
AuthorVerified: item.AuthorVerified,
|
|||
|
|
FollowerCount: item.FollowerCount,
|
|||
|
|
LikeCount: item.LikeCount,
|
|||
|
|
ReplyCount: item.ReplyCount,
|
|||
|
|
Source: DiscoverCrawler,
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
return posts, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func resolveKeywordSearchCLI() (repoRoot, cliPath string, err error) {
|
|||
|
|
if root := strings.TrimSpace(os.Getenv("HAIXUN_REPO_ROOT")); root != "" {
|
|||
|
|
cli := filepath.Join(root, "haixun-backend", "worker", "threads-keyword-search-cli.ts")
|
|||
|
|
if fileExists(cli) {
|
|||
|
|
return root, cli, nil
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
cwd, err := os.Getwd()
|
|||
|
|
if err != nil {
|
|||
|
|
return "", "", fmt.Errorf("resolve crawler cli: %w", err)
|
|||
|
|
}
|
|||
|
|
dir := cwd
|
|||
|
|
for i := 0; i < 6; i++ {
|
|||
|
|
cli := filepath.Join(dir, "haixun-backend", "worker", "threads-keyword-search-cli.ts")
|
|||
|
|
if fileExists(cli) {
|
|||
|
|
return dir, cli, nil
|
|||
|
|
}
|
|||
|
|
cli = filepath.Join(dir, "worker", "threads-keyword-search-cli.ts")
|
|||
|
|
if fileExists(cli) {
|
|||
|
|
return dir, cli, nil
|
|||
|
|
}
|
|||
|
|
parent := filepath.Dir(dir)
|
|||
|
|
if parent == dir {
|
|||
|
|
break
|
|||
|
|
}
|
|||
|
|
dir = parent
|
|||
|
|
}
|
|||
|
|
return "", "", fmt.Errorf("找不到 threads-keyword-search-cli.ts,請設定 HAIXUN_REPO_ROOT")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func fileExists(path string) bool {
|
|||
|
|
info, err := os.Stat(path)
|
|||
|
|
return err == nil && !info.IsDir()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// CrawlerKeywordFromQuery extracts plain keyword from Brave-style query strings.
|
|||
|
|
func CrawlerKeywordFromQuery(query, keyword string) string {
|
|||
|
|
if k := strings.TrimSpace(keyword); k != "" {
|
|||
|
|
return k
|
|||
|
|
}
|
|||
|
|
q := strings.TrimSpace(query)
|
|||
|
|
q = strings.TrimPrefix(q, "site:threads.net ")
|
|||
|
|
q = strings.Trim(q, `"`)
|
|||
|
|
if idx := strings.Index(q, " after:"); idx > 0 {
|
|||
|
|
q = strings.TrimSpace(q[:idx])
|
|||
|
|
}
|
|||
|
|
q = strings.Trim(q, `"`)
|
|||
|
|
if idx := strings.Index(q, " 請問"); idx > 0 {
|
|||
|
|
q = strings.TrimSpace(q[:idx])
|
|||
|
|
}
|
|||
|
|
return strings.Trim(q, `"`)
|
|||
|
|
}
|