package placement import ( "context" "fmt" "strings" "time" libkg "haixun-backend/internal/library/knowledge" "haixun-backend/internal/library/websearch" ) const ( relevanceLimitPerTag = 12 recencyLimitPerTag = 8 ) type ScanCandidate struct { Permalink string ExternalID string Author string Text string SearchTag string QueryDimension QueryDimension GraphNodeID string ProductFitScore int Source DiscoverChannel HasRelevance bool HasRecency bool Priority string AuthorVerified bool FollowerCount int LikeCount int ReplyCount int EngagementScore int PlacementScore int SolvedByProduct bool PostedAt string Replies []ReplyCandidate } type DualTrackInput struct { Nodes []libkg.Node PatrolKeywords []string Exclusions []string Member MemberContext WebSearch websearch.Client Crawler CrawlerSearchFn Limit int // max queries budget; 0 = default OnCheckpoint func(candidates []ScanCandidate) error } type DualTrackProgress func(message string, pct int) // CollectTagQueries builds crawl jobs from selected graph nodes. func CollectTagQueries(nodes []libkg.Node, provider websearch.Provider) []TagQuery { out := make([]TagQuery, 0, len(nodes)*4) for _, node := range nodes { if !node.SelectedForScan { continue } fit := node.ProductFitScore derived := node.DerivedTags if len(derived.Relevance) == 0 && len(derived.Recency) == 0 { derived = libkg.DerivePatrolTagsForNode(node, libkg.PatrolTagInput{}) } for _, tag := range derived.Relevance { tag = strings.TrimSpace(tag) if tag == "" { continue } q := BuildRelevanceQuery(provider, tag) if q == "" { continue } out = append(out, TagQuery{ Tag: tag, Query: q, Dimension: QueryRelevance, GraphNodeID: node.ID, ProductFitScore: fit, }) } for _, tag := range derived.Recency { tag = strings.TrimSpace(tag) if tag == "" { continue } q7 := BuildRecencyQuery(provider, tag, IdealMaxPostAgeDays) if q7 != "" { out = append(out, TagQuery{ Tag: tag, Query: q7, Dimension: QueryRecency, GraphNodeID: node.ID, ProductFitScore: fit, RecencyDays: IdealMaxPostAgeDays, }) } q30 := BuildRecencyQuery(provider, tag, MaxPostAgeDays) if q30 != "" && q30 != q7 { out = append(out, TagQuery{ Tag: tag, Query: q30, Dimension: QueryRecency, GraphNodeID: node.ID, ProductFitScore: fit, RecencyDays: MaxPostAgeDays, }) } } } return out } // RunDualTrackDiscover executes relevance + recency queries and merges by permalink. func RunDualTrackDiscover(ctx context.Context, input DualTrackInput, onProgress DualTrackProgress) ([]ScanCandidate, error) { queries := ResolveTagQueries(input.Nodes, input.PatrolKeywords, input.Member.WebSearchProviderEnum()) if len(queries) == 0 { if len(input.PatrolKeywords) > 0 { return nil, fmt.Errorf("海巡關鍵字格式無效,請改用 2~8 字的真人搜尋短句") } selected := 0 for _, node := range input.Nodes { if node.SelectedForScan { selected++ } } if selected > 0 { return nil, fmt.Errorf("已勾選節點但沒有可用的海巡 tag,請重新擴展圖譜或手動編輯 tag") } return nil, fmt.Errorf("請先勾選要海巡的節點並儲存") } merged := map[string]*ScanCandidate{} order := make([]string, 0, 64) runQuery := func(tq TagQuery, limit int) error { posts, channel, err := discoverForQuery(ctx, input, tq, limit) if err != nil { return err } for _, post := range posts { if MatchesExclusion(post.Text, input.Exclusions) { continue } if !PassesPlacementFilter(post.Text) { continue } key := post.Permalink if key == "" { continue } existing, ok := merged[key] if !ok { priority := "relevant" if tq.Dimension == QueryRecency { priority = "recent" } extID := post.ExternalID if extID == "" { if parsed, ok := ParseThreadsPostFromWebResult(post.Text, "", post.Permalink); ok { extID = parsed.ExternalID } } merged[key] = &ScanCandidate{ Permalink: post.Permalink, ExternalID: extID, Author: post.Author, AuthorVerified: post.AuthorVerified, FollowerCount: post.FollowerCount, Text: post.Text, SearchTag: tq.Tag, QueryDimension: tq.Dimension, GraphNodeID: tq.GraphNodeID, ProductFitScore: tq.ProductFitScore, Source: channel, HasRelevance: tq.Dimension == QueryRelevance, HasRecency: tq.Dimension == QueryRecency, Priority: priority, LikeCount: post.LikeCount, ReplyCount: post.ReplyCount, PlacementScore: computePlacementScore(post.Text, tq.ProductFitScore, tq.Dimension == QueryRecency), SolvedByProduct: tq.ProductFitScore >= 55, PostedAt: strings.TrimSpace(post.PostedAt), } order = append(order, key) continue } if tq.Dimension == QueryRelevance { existing.HasRelevance = true } if tq.Dimension == QueryRecency { existing.HasRecency = true } if tq.ProductFitScore > existing.ProductFitScore { existing.ProductFitScore = tq.ProductFitScore existing.SolvedByProduct = tq.ProductFitScore >= 55 } if strings.TrimSpace(existing.PostedAt) == "" && strings.TrimSpace(post.PostedAt) != "" { existing.PostedAt = strings.TrimSpace(post.PostedAt) } } return nil } total := len(queries) for i, tq := range queries { if onProgress != nil { pct := 10 + ((i + 1) * 75 / max(total, 1)) onProgress(fmt.Sprintf("雙軌海巡 %d/%d:%s", i+1, total, tq.Tag), pct) } limit := relevanceLimitPerTag if tq.Dimension == QueryRecency { limit = recencyLimitPerTag } if err := runQuery(tq, limit); err != nil { return nil, err } if input.OnCheckpoint != nil { snapshot := snapshotMergedCandidates(merged, order, false) if err := input.OnCheckpoint(snapshot); err != nil { return nil, err } } if input.Member.AllowsCrawler && input.Member.BrowserConnected && i < total-1 { if err := politeDiscoverPause(ctx); err != nil { return nil, err } } } out := snapshotMergedCandidates(merged, order, true) if onProgress != nil { onProgress(fmt.Sprintf("合併完成,共 %d 篇候選貼文", len(out)), 90) } return out, nil } func discoverForQuery(ctx context.Context, input DualTrackInput, tq TagQuery, limit int) ([]DiscoverPost, DiscoverChannel, error) { req := DiscoverRequest{ Query: tq.Query, Keyword: tq.Tag, Recency: tq.Dimension == QueryRecency, Limit: limit, Member: input.Member, Crawler: input.Crawler, } posts, channel, err := Discover(ctx, req) if err == nil && len(posts) > 0 { return posts, channel, nil } if input.WebSearch == nil || !input.WebSearch.Enabled() { if err != nil { return nil, "", err } return nil, "", fmt.Errorf("%s 未設定且 Threads API 無結果", input.Member.WebSearchProviderLabel()) } webPosts, werr := discoverViaWebSearch(ctx, input.WebSearch, input.Member, tq, limit) if werr != nil { if err != nil { return nil, "", err } return nil, "", werr } return webPosts, input.Member.WebSearchDiscoverChannel(), nil } func discoverViaWebSearch(ctx context.Context, client websearch.Client, member MemberContext, tq TagQuery, limit int) ([]DiscoverPost, error) { res, err := client.Search(ctx, websearch.SearchOptions{ Query: tq.Query, Limit: limit, Mode: websearch.ModeThreadsDiscover, Country: member.BraveCountry, SearchLang: member.BraveSearchLang, UserLocation: member.ExaUserLocation, StartPublishedDate: PublishedAfterForRecency(member.WebSearchProviderEnum(), tq.RecencyDays), }) if err != nil { return nil, err } if res.Status != "success" || len(res.Results) == 0 { return nil, nil } source := member.WebSearchDiscoverChannel() out := make([]DiscoverPost, 0, len(res.Results)) for _, item := range res.Results { parsed, ok := ParseThreadsPostFromWebResult(item.Title, item.Snippet, item.URL) if !ok { continue } out = append(out, DiscoverPost{ Text: parsed.Text, Permalink: parsed.Permalink, ExternalID: parsed.ExternalID, Author: parsed.Author, Source: source, }) } return out, nil } func snapshotMergedCandidates(merged map[string]*ScanCandidate, order []string, applyFinalFilter bool) []ScanCandidate { out := make([]ScanCandidate, 0, len(order)) for _, key := range order { item := merged[key] finalizeScanCandidate(item) if applyFinalFilter && item.ProductFitScore < 30 && item.Priority != "gold" { continue } out = append(out, *item) } return out } func finalizeScanCandidate(item *ScanCandidate) { if item == nil { return } if item.HasRelevance && item.HasRecency && item.ProductFitScore >= 45 { item.Priority = "gold" } else if item.HasRecency { item.Priority = "recent" } else { item.Priority = "relevant" } item.PlacementScore = computePlacementScore(item.Text, item.ProductFitScore, item.HasRecency) item.SolvedByProduct = item.ProductFitScore >= 55 } func computePlacementScore(text string, productFit int, recent bool) int { score := 30 + productFit/4 if HasPlacementIntent(text) { score += 20 } if LooksLikeRecommendationPost(text) { score += 12 } if recent { score += 15 } if productFit >= 60 { score += 8 } if score > 100 { return 100 } return score } func max(a, b int) int { if a > b { return a } return b } func politeDiscoverPause(ctx context.Context) error { wait := 2*time.Second + jitterDuration(2*time.Second) timer := time.NewTimer(wait) defer timer.Stop() select { case <-ctx.Done(): return ctx.Err() case <-timer.C: return nil } }