2026-06-24 10:02:42 +00:00
|
|
|
|
package placement
|
|
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
|
"context"
|
|
|
|
|
|
"fmt"
|
|
|
|
|
|
"strings"
|
2026-06-24 16:48:56 +00:00
|
|
|
|
"time"
|
2026-06-24 10:02:42 +00:00
|
|
|
|
|
|
|
|
|
|
libbrave "haixun-backend/internal/library/brave"
|
|
|
|
|
|
libkg "haixun-backend/internal/library/knowledge"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
|
relevanceLimitPerTag = 12
|
|
|
|
|
|
recencyLimitPerTag = 8
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
type ScanCandidate struct {
|
|
|
|
|
|
Permalink string
|
|
|
|
|
|
ExternalID string
|
|
|
|
|
|
Author string
|
|
|
|
|
|
Text string
|
|
|
|
|
|
SearchTag string
|
|
|
|
|
|
QueryDimension QueryDimension
|
|
|
|
|
|
GraphNodeID string
|
|
|
|
|
|
ProductFitScore int
|
|
|
|
|
|
Source DiscoverChannel
|
|
|
|
|
|
HasRelevance bool
|
|
|
|
|
|
HasRecency bool
|
|
|
|
|
|
Priority string
|
|
|
|
|
|
LikeCount int
|
|
|
|
|
|
ReplyCount int
|
|
|
|
|
|
EngagementScore int
|
|
|
|
|
|
PlacementScore int
|
|
|
|
|
|
SolvedByProduct bool
|
2026-06-24 16:48:56 +00:00
|
|
|
|
PostedAt string
|
2026-06-24 10:02:42 +00:00
|
|
|
|
Replies []ReplyCandidate
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
type DualTrackInput struct {
|
2026-06-24 16:48:56 +00:00
|
|
|
|
Nodes []libkg.Node
|
|
|
|
|
|
PatrolKeywords []string
|
|
|
|
|
|
Exclusions []string
|
|
|
|
|
|
Member MemberContext
|
|
|
|
|
|
Client *libbrave.Client
|
|
|
|
|
|
Crawler CrawlerSearchFn
|
|
|
|
|
|
Limit int // max queries budget; 0 = default
|
|
|
|
|
|
OnCheckpoint func(candidates []ScanCandidate) error
|
2026-06-24 10:02:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
type DualTrackProgress func(message string, pct int)
|
|
|
|
|
|
|
|
|
|
|
|
// CollectTagQueries builds crawl jobs from selected graph nodes.
|
|
|
|
|
|
func CollectTagQueries(nodes []libkg.Node) []TagQuery {
|
|
|
|
|
|
out := make([]TagQuery, 0, len(nodes)*4)
|
|
|
|
|
|
for _, node := range nodes {
|
|
|
|
|
|
if !node.SelectedForScan {
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
fit := node.ProductFitScore
|
|
|
|
|
|
for _, tag := range node.DerivedTags.Relevance {
|
|
|
|
|
|
tag = strings.TrimSpace(tag)
|
|
|
|
|
|
if tag == "" {
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
q := BuildRelevanceQuery(tag)
|
|
|
|
|
|
if q == "" {
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
out = append(out, TagQuery{
|
|
|
|
|
|
Tag: tag,
|
|
|
|
|
|
Query: q,
|
|
|
|
|
|
Dimension: QueryRelevance,
|
|
|
|
|
|
GraphNodeID: node.ID,
|
|
|
|
|
|
ProductFitScore: fit,
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
for _, tag := range node.DerivedTags.Recency {
|
|
|
|
|
|
tag = strings.TrimSpace(tag)
|
|
|
|
|
|
if tag == "" {
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
q7 := BuildRecencyQuery(tag, IdealMaxPostAgeDays)
|
|
|
|
|
|
if q7 != "" {
|
|
|
|
|
|
out = append(out, TagQuery{
|
|
|
|
|
|
Tag: tag,
|
|
|
|
|
|
Query: q7,
|
|
|
|
|
|
Dimension: QueryRecency,
|
|
|
|
|
|
GraphNodeID: node.ID,
|
|
|
|
|
|
ProductFitScore: fit,
|
|
|
|
|
|
RecencyDays: IdealMaxPostAgeDays,
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
q30 := BuildRecencyQuery(tag, MaxPostAgeDays)
|
|
|
|
|
|
if q30 != "" && q30 != q7 {
|
|
|
|
|
|
out = append(out, TagQuery{
|
|
|
|
|
|
Tag: tag,
|
|
|
|
|
|
Query: q30,
|
|
|
|
|
|
Dimension: QueryRecency,
|
|
|
|
|
|
GraphNodeID: node.ID,
|
|
|
|
|
|
ProductFitScore: fit,
|
|
|
|
|
|
RecencyDays: MaxPostAgeDays,
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return out
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// RunDualTrackDiscover executes relevance + recency queries and merges by permalink.
|
|
|
|
|
|
func RunDualTrackDiscover(ctx context.Context, input DualTrackInput, onProgress DualTrackProgress) ([]ScanCandidate, error) {
|
2026-06-24 16:48:56 +00:00
|
|
|
|
queries := ResolveTagQueries(input.Nodes, input.PatrolKeywords)
|
2026-06-24 10:02:42 +00:00
|
|
|
|
if len(queries) == 0 {
|
2026-06-24 16:48:56 +00:00
|
|
|
|
if len(input.PatrolKeywords) > 0 {
|
|
|
|
|
|
return nil, fmt.Errorf("海巡關鍵字格式無效,請改用 2~8 字的真人搜尋短句")
|
|
|
|
|
|
}
|
2026-06-24 10:02:42 +00:00
|
|
|
|
return nil, fmt.Errorf("沒有勾選的節點或可用 tag")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
merged := map[string]*ScanCandidate{}
|
|
|
|
|
|
order := make([]string, 0, 64)
|
|
|
|
|
|
|
|
|
|
|
|
runQuery := func(tq TagQuery, limit int) error {
|
|
|
|
|
|
posts, channel, err := discoverForQuery(ctx, input, tq, limit)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
for _, post := range posts {
|
|
|
|
|
|
if MatchesExclusion(post.Text, input.Exclusions) {
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
if !PassesPlacementFilter(post.Text) {
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
key := post.Permalink
|
|
|
|
|
|
if key == "" {
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
existing, ok := merged[key]
|
|
|
|
|
|
if !ok {
|
|
|
|
|
|
priority := "relevant"
|
|
|
|
|
|
if tq.Dimension == QueryRecency {
|
|
|
|
|
|
priority = "recent"
|
|
|
|
|
|
}
|
|
|
|
|
|
extID := post.ExternalID
|
|
|
|
|
|
if extID == "" {
|
|
|
|
|
|
if parsed, ok := ParseThreadsPostFromWebResult(post.Text, "", post.Permalink); ok {
|
|
|
|
|
|
extID = parsed.ExternalID
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
merged[key] = &ScanCandidate{
|
|
|
|
|
|
Permalink: post.Permalink,
|
|
|
|
|
|
ExternalID: extID,
|
|
|
|
|
|
Author: post.Author,
|
|
|
|
|
|
Text: post.Text,
|
|
|
|
|
|
SearchTag: tq.Tag,
|
|
|
|
|
|
QueryDimension: tq.Dimension,
|
|
|
|
|
|
GraphNodeID: tq.GraphNodeID,
|
|
|
|
|
|
ProductFitScore: tq.ProductFitScore,
|
|
|
|
|
|
Source: channel,
|
|
|
|
|
|
HasRelevance: tq.Dimension == QueryRelevance,
|
|
|
|
|
|
HasRecency: tq.Dimension == QueryRecency,
|
|
|
|
|
|
Priority: priority,
|
|
|
|
|
|
PlacementScore: computePlacementScore(post.Text, tq.ProductFitScore, tq.Dimension == QueryRecency),
|
|
|
|
|
|
SolvedByProduct: tq.ProductFitScore >= 55,
|
2026-06-24 16:48:56 +00:00
|
|
|
|
PostedAt: strings.TrimSpace(post.PostedAt),
|
2026-06-24 10:02:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
order = append(order, key)
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
if tq.Dimension == QueryRelevance {
|
|
|
|
|
|
existing.HasRelevance = true
|
|
|
|
|
|
}
|
|
|
|
|
|
if tq.Dimension == QueryRecency {
|
|
|
|
|
|
existing.HasRecency = true
|
|
|
|
|
|
}
|
|
|
|
|
|
if tq.ProductFitScore > existing.ProductFitScore {
|
|
|
|
|
|
existing.ProductFitScore = tq.ProductFitScore
|
|
|
|
|
|
existing.SolvedByProduct = tq.ProductFitScore >= 55
|
|
|
|
|
|
}
|
2026-06-24 16:48:56 +00:00
|
|
|
|
if strings.TrimSpace(existing.PostedAt) == "" && strings.TrimSpace(post.PostedAt) != "" {
|
|
|
|
|
|
existing.PostedAt = strings.TrimSpace(post.PostedAt)
|
|
|
|
|
|
}
|
2026-06-24 10:02:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
total := len(queries)
|
|
|
|
|
|
for i, tq := range queries {
|
|
|
|
|
|
if onProgress != nil {
|
|
|
|
|
|
pct := 10 + ((i + 1) * 75 / max(total, 1))
|
|
|
|
|
|
onProgress(fmt.Sprintf("雙軌海巡 %d/%d:%s", i+1, total, tq.Tag), pct)
|
|
|
|
|
|
}
|
|
|
|
|
|
limit := relevanceLimitPerTag
|
|
|
|
|
|
if tq.Dimension == QueryRecency {
|
|
|
|
|
|
limit = recencyLimitPerTag
|
|
|
|
|
|
}
|
|
|
|
|
|
if err := runQuery(tq, limit); err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
2026-06-24 16:48:56 +00:00
|
|
|
|
if input.OnCheckpoint != nil {
|
|
|
|
|
|
snapshot := snapshotMergedCandidates(merged, order, false)
|
|
|
|
|
|
if err := input.OnCheckpoint(snapshot); err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
2026-06-24 10:02:42 +00:00
|
|
|
|
}
|
2026-06-24 16:48:56 +00:00
|
|
|
|
if input.Member.AllowsCrawler && input.Member.DevMode && i < total-1 {
|
|
|
|
|
|
if err := politeDiscoverPause(ctx); err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
2026-06-24 10:02:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-06-24 16:48:56 +00:00
|
|
|
|
|
|
|
|
|
|
out := snapshotMergedCandidates(merged, order, true)
|
2026-06-24 10:02:42 +00:00
|
|
|
|
if onProgress != nil {
|
|
|
|
|
|
onProgress(fmt.Sprintf("合併完成,共 %d 篇候選貼文", len(out)), 90)
|
|
|
|
|
|
}
|
|
|
|
|
|
return out, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func discoverForQuery(ctx context.Context, input DualTrackInput, tq TagQuery, limit int) ([]DiscoverPost, DiscoverChannel, error) {
|
|
|
|
|
|
req := DiscoverRequest{
|
|
|
|
|
|
Query: tq.Query,
|
|
|
|
|
|
Keyword: tq.Tag,
|
|
|
|
|
|
Recency: tq.Dimension == QueryRecency,
|
|
|
|
|
|
Limit: limit,
|
|
|
|
|
|
Member: input.Member,
|
|
|
|
|
|
Crawler: input.Crawler,
|
|
|
|
|
|
}
|
|
|
|
|
|
posts, channel, err := Discover(ctx, req)
|
|
|
|
|
|
if err == nil && len(posts) > 0 {
|
|
|
|
|
|
return posts, channel, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
if input.Client == nil || !input.Client.Enabled() {
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return nil, "", err
|
|
|
|
|
|
}
|
|
|
|
|
|
return nil, "", fmt.Errorf("Brave 未設定且 Threads API 無結果")
|
|
|
|
|
|
}
|
|
|
|
|
|
bravePosts, berr := discoverViaBrave(ctx, input.Client, input.Member, tq.Query, limit)
|
|
|
|
|
|
if berr != nil {
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return nil, "", err
|
|
|
|
|
|
}
|
|
|
|
|
|
return nil, "", berr
|
|
|
|
|
|
}
|
|
|
|
|
|
return bravePosts, DiscoverBrave, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func discoverViaBrave(ctx context.Context, client *libbrave.Client, member MemberContext, query string, limit int) ([]DiscoverPost, error) {
|
|
|
|
|
|
res, err := client.Search(ctx, libbrave.SearchOptions{
|
|
|
|
|
|
Query: query,
|
|
|
|
|
|
Limit: limit,
|
|
|
|
|
|
Mode: libbrave.ModeThreadsDiscover,
|
|
|
|
|
|
Country: member.BraveCountry,
|
|
|
|
|
|
SearchLang: member.BraveSearchLang,
|
|
|
|
|
|
})
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
|
|
|
|
|
if res.Status != "success" || len(res.Results) == 0 {
|
|
|
|
|
|
return nil, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
out := make([]DiscoverPost, 0, len(res.Results))
|
|
|
|
|
|
for _, item := range res.Results {
|
|
|
|
|
|
parsed, ok := ParseThreadsPostFromWebResult(item.Title, item.Snippet, item.URL)
|
|
|
|
|
|
if !ok {
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
out = append(out, DiscoverPost{
|
|
|
|
|
|
Text: parsed.Text,
|
|
|
|
|
|
Permalink: parsed.Permalink,
|
|
|
|
|
|
ExternalID: parsed.ExternalID,
|
|
|
|
|
|
Author: parsed.Author,
|
|
|
|
|
|
Source: DiscoverBrave,
|
|
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
return out, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-24 16:48:56 +00:00
|
|
|
|
func snapshotMergedCandidates(merged map[string]*ScanCandidate, order []string, applyFinalFilter bool) []ScanCandidate {
|
|
|
|
|
|
out := make([]ScanCandidate, 0, len(order))
|
|
|
|
|
|
for _, key := range order {
|
|
|
|
|
|
item := merged[key]
|
|
|
|
|
|
finalizeScanCandidate(item)
|
|
|
|
|
|
if applyFinalFilter && item.ProductFitScore < 30 && item.Priority != "gold" {
|
|
|
|
|
|
continue
|
|
|
|
|
|
}
|
|
|
|
|
|
out = append(out, *item)
|
|
|
|
|
|
}
|
|
|
|
|
|
return out
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func finalizeScanCandidate(item *ScanCandidate) {
|
|
|
|
|
|
if item == nil {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
if item.HasRelevance && item.HasRecency && item.ProductFitScore >= 45 {
|
|
|
|
|
|
item.Priority = "gold"
|
|
|
|
|
|
} else if item.HasRecency {
|
|
|
|
|
|
item.Priority = "recent"
|
|
|
|
|
|
} else {
|
|
|
|
|
|
item.Priority = "relevant"
|
|
|
|
|
|
}
|
|
|
|
|
|
item.PlacementScore = computePlacementScore(item.Text, item.ProductFitScore, item.HasRecency)
|
|
|
|
|
|
item.SolvedByProduct = item.ProductFitScore >= 55
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-24 10:02:42 +00:00
|
|
|
|
func computePlacementScore(text string, productFit int, recent bool) int {
|
|
|
|
|
|
score := 30 + productFit/4
|
|
|
|
|
|
if HasPlacementIntent(text) {
|
|
|
|
|
|
score += 20
|
|
|
|
|
|
}
|
|
|
|
|
|
if LooksLikeRecommendationPost(text) {
|
|
|
|
|
|
score += 12
|
|
|
|
|
|
}
|
|
|
|
|
|
if recent {
|
|
|
|
|
|
score += 15
|
|
|
|
|
|
}
|
|
|
|
|
|
if productFit >= 60 {
|
|
|
|
|
|
score += 8
|
|
|
|
|
|
}
|
|
|
|
|
|
if score > 100 {
|
|
|
|
|
|
return 100
|
|
|
|
|
|
}
|
|
|
|
|
|
return score
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func max(a, b int) int {
|
|
|
|
|
|
if a > b {
|
|
|
|
|
|
return a
|
|
|
|
|
|
}
|
|
|
|
|
|
return b
|
|
|
|
|
|
}
|
2026-06-24 16:48:56 +00:00
|
|
|
|
|
|
|
|
|
|
func politeDiscoverPause(ctx context.Context) error {
|
|
|
|
|
|
wait := 2*time.Second + jitterDuration(2*time.Second)
|
|
|
|
|
|
timer := time.NewTimer(wait)
|
|
|
|
|
|
defer timer.Stop()
|
|
|
|
|
|
select {
|
|
|
|
|
|
case <-ctx.Done():
|
|
|
|
|
|
return ctx.Err()
|
|
|
|
|
|
case <-timer.C:
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|