package placement import ( "context" "fmt" "strings" "time" libbrave "haixun-backend/internal/library/brave" libkg "haixun-backend/internal/library/knowledge" ) const ( relevanceLimitPerTag = 12 recencyLimitPerTag = 8 ) type ScanCandidate struct { Permalink string ExternalID string Author string Text string SearchTag string QueryDimension QueryDimension GraphNodeID string ProductFitScore int Source DiscoverChannel HasRelevance bool HasRecency bool Priority string LikeCount int ReplyCount int EngagementScore int PlacementScore int SolvedByProduct bool PostedAt string Replies []ReplyCandidate } type DualTrackInput struct { Nodes []libkg.Node PatrolKeywords []string Exclusions []string Member MemberContext Client *libbrave.Client Crawler CrawlerSearchFn Limit int // max queries budget; 0 = default OnCheckpoint func(candidates []ScanCandidate) error } type DualTrackProgress func(message string, pct int) // CollectTagQueries builds crawl jobs from selected graph nodes. func CollectTagQueries(nodes []libkg.Node) []TagQuery { out := make([]TagQuery, 0, len(nodes)*4) for _, node := range nodes { if !node.SelectedForScan { continue } fit := node.ProductFitScore for _, tag := range node.DerivedTags.Relevance { tag = strings.TrimSpace(tag) if tag == "" { continue } q := BuildRelevanceQuery(tag) if q == "" { continue } out = append(out, TagQuery{ Tag: tag, Query: q, Dimension: QueryRelevance, GraphNodeID: node.ID, ProductFitScore: fit, }) } for _, tag := range node.DerivedTags.Recency { tag = strings.TrimSpace(tag) if tag == "" { continue } q7 := BuildRecencyQuery(tag, IdealMaxPostAgeDays) if q7 != "" { out = append(out, TagQuery{ Tag: tag, Query: q7, Dimension: QueryRecency, GraphNodeID: node.ID, ProductFitScore: fit, RecencyDays: IdealMaxPostAgeDays, }) } q30 := BuildRecencyQuery(tag, MaxPostAgeDays) if q30 != "" && q30 != q7 { out = append(out, TagQuery{ Tag: tag, Query: q30, Dimension: QueryRecency, GraphNodeID: node.ID, ProductFitScore: fit, RecencyDays: MaxPostAgeDays, }) } } } return out } // RunDualTrackDiscover executes relevance + recency queries and merges by permalink. func RunDualTrackDiscover(ctx context.Context, input DualTrackInput, onProgress DualTrackProgress) ([]ScanCandidate, error) { queries := ResolveTagQueries(input.Nodes, input.PatrolKeywords) if len(queries) == 0 { if len(input.PatrolKeywords) > 0 { return nil, fmt.Errorf("海巡關鍵字格式無效,請改用 2~8 字的真人搜尋短句") } return nil, fmt.Errorf("沒有勾選的節點或可用 tag") } merged := map[string]*ScanCandidate{} order := make([]string, 0, 64) runQuery := func(tq TagQuery, limit int) error { posts, channel, err := discoverForQuery(ctx, input, tq, limit) if err != nil { return err } for _, post := range posts { if MatchesExclusion(post.Text, input.Exclusions) { continue } if !PassesPlacementFilter(post.Text) { continue } key := post.Permalink if key == "" { continue } existing, ok := merged[key] if !ok { priority := "relevant" if tq.Dimension == QueryRecency { priority = "recent" } extID := post.ExternalID if extID == "" { if parsed, ok := ParseThreadsPostFromWebResult(post.Text, "", post.Permalink); ok { extID = parsed.ExternalID } } merged[key] = &ScanCandidate{ Permalink: post.Permalink, ExternalID: extID, Author: post.Author, Text: post.Text, SearchTag: tq.Tag, QueryDimension: tq.Dimension, GraphNodeID: tq.GraphNodeID, ProductFitScore: tq.ProductFitScore, Source: channel, HasRelevance: tq.Dimension == QueryRelevance, HasRecency: tq.Dimension == QueryRecency, Priority: priority, PlacementScore: computePlacementScore(post.Text, tq.ProductFitScore, tq.Dimension == QueryRecency), SolvedByProduct: tq.ProductFitScore >= 55, PostedAt: strings.TrimSpace(post.PostedAt), } order = append(order, key) continue } if tq.Dimension == QueryRelevance { existing.HasRelevance = true } if tq.Dimension == QueryRecency { existing.HasRecency = true } if tq.ProductFitScore > existing.ProductFitScore { existing.ProductFitScore = tq.ProductFitScore existing.SolvedByProduct = tq.ProductFitScore >= 55 } if strings.TrimSpace(existing.PostedAt) == "" && strings.TrimSpace(post.PostedAt) != "" { existing.PostedAt = strings.TrimSpace(post.PostedAt) } } return nil } total := len(queries) for i, tq := range queries { if onProgress != nil { pct := 10 + ((i + 1) * 75 / max(total, 1)) onProgress(fmt.Sprintf("雙軌海巡 %d/%d:%s", i+1, total, tq.Tag), pct) } limit := relevanceLimitPerTag if tq.Dimension == QueryRecency { limit = recencyLimitPerTag } if err := runQuery(tq, limit); err != nil { return nil, err } if input.OnCheckpoint != nil { snapshot := snapshotMergedCandidates(merged, order, false) if err := input.OnCheckpoint(snapshot); err != nil { return nil, err } } if input.Member.AllowsCrawler && input.Member.DevMode && i < total-1 { if err := politeDiscoverPause(ctx); err != nil { return nil, err } } } out := snapshotMergedCandidates(merged, order, true) if onProgress != nil { onProgress(fmt.Sprintf("合併完成,共 %d 篇候選貼文", len(out)), 90) } return out, nil } func discoverForQuery(ctx context.Context, input DualTrackInput, tq TagQuery, limit int) ([]DiscoverPost, DiscoverChannel, error) { req := DiscoverRequest{ Query: tq.Query, Keyword: tq.Tag, Recency: tq.Dimension == QueryRecency, Limit: limit, Member: input.Member, Crawler: input.Crawler, } posts, channel, err := Discover(ctx, req) if err == nil && len(posts) > 0 { return posts, channel, nil } if input.Client == nil || !input.Client.Enabled() { if err != nil { return nil, "", err } return nil, "", fmt.Errorf("Brave 未設定且 Threads API 無結果") } bravePosts, berr := discoverViaBrave(ctx, input.Client, input.Member, tq.Query, limit) if berr != nil { if err != nil { return nil, "", err } return nil, "", berr } return bravePosts, DiscoverBrave, nil } func discoverViaBrave(ctx context.Context, client *libbrave.Client, member MemberContext, query string, limit int) ([]DiscoverPost, error) { res, err := client.Search(ctx, libbrave.SearchOptions{ Query: query, Limit: limit, Mode: libbrave.ModeThreadsDiscover, Country: member.BraveCountry, SearchLang: member.BraveSearchLang, }) if err != nil { return nil, err } if res.Status != "success" || len(res.Results) == 0 { return nil, nil } out := make([]DiscoverPost, 0, len(res.Results)) for _, item := range res.Results { parsed, ok := ParseThreadsPostFromWebResult(item.Title, item.Snippet, item.URL) if !ok { continue } out = append(out, DiscoverPost{ Text: parsed.Text, Permalink: parsed.Permalink, ExternalID: parsed.ExternalID, Author: parsed.Author, Source: DiscoverBrave, }) } return out, nil } func snapshotMergedCandidates(merged map[string]*ScanCandidate, order []string, applyFinalFilter bool) []ScanCandidate { out := make([]ScanCandidate, 0, len(order)) for _, key := range order { item := merged[key] finalizeScanCandidate(item) if applyFinalFilter && item.ProductFitScore < 30 && item.Priority != "gold" { continue } out = append(out, *item) } return out } func finalizeScanCandidate(item *ScanCandidate) { if item == nil { return } if item.HasRelevance && item.HasRecency && item.ProductFitScore >= 45 { item.Priority = "gold" } else if item.HasRecency { item.Priority = "recent" } else { item.Priority = "relevant" } item.PlacementScore = computePlacementScore(item.Text, item.ProductFitScore, item.HasRecency) item.SolvedByProduct = item.ProductFitScore >= 55 } func computePlacementScore(text string, productFit int, recent bool) int { score := 30 + productFit/4 if HasPlacementIntent(text) { score += 20 } if LooksLikeRecommendationPost(text) { score += 12 } if recent { score += 15 } if productFit >= 60 { score += 8 } if score > 100 { return 100 } return score } func max(a, b int) int { if a > b { return a } return b } func politeDiscoverPause(ctx context.Context) error { wait := 2*time.Second + jitterDuration(2*time.Second) timer := time.NewTimer(wait) defer timer.Stop() select { case <-ctx.Done(): return ctx.Err() case <-timer.C: return nil } }