haixunMaster/haixun-backend/internal/library/placement/dual_track.go

301 lines
7.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package placement
import (
"context"
"fmt"
"strings"
libbrave "haixun-backend/internal/library/brave"
libkg "haixun-backend/internal/library/knowledge"
)
const (
relevanceLimitPerTag = 12
recencyLimitPerTag = 8
)
type ScanCandidate struct {
Permalink string
ExternalID string
Author string
Text string
SearchTag string
QueryDimension QueryDimension
GraphNodeID string
ProductFitScore int
Source DiscoverChannel
HasRelevance bool
HasRecency bool
Priority string
LikeCount int
ReplyCount int
EngagementScore int
PlacementScore int
SolvedByProduct bool
Replies []ReplyCandidate
}
type DualTrackInput struct {
Nodes []libkg.Node
Exclusions []string
Member MemberContext
Client *libbrave.Client
Crawler CrawlerSearchFn
Limit int // max queries budget; 0 = default
}
type DualTrackProgress func(message string, pct int)
// CollectTagQueries builds crawl jobs from selected graph nodes.
func CollectTagQueries(nodes []libkg.Node) []TagQuery {
out := make([]TagQuery, 0, len(nodes)*4)
for _, node := range nodes {
if !node.SelectedForScan {
continue
}
fit := node.ProductFitScore
for _, tag := range node.DerivedTags.Relevance {
tag = strings.TrimSpace(tag)
if tag == "" {
continue
}
q := BuildRelevanceQuery(tag)
if q == "" {
continue
}
out = append(out, TagQuery{
Tag: tag,
Query: q,
Dimension: QueryRelevance,
GraphNodeID: node.ID,
ProductFitScore: fit,
})
}
for _, tag := range node.DerivedTags.Recency {
tag = strings.TrimSpace(tag)
if tag == "" {
continue
}
q7 := BuildRecencyQuery(tag, IdealMaxPostAgeDays)
if q7 != "" {
out = append(out, TagQuery{
Tag: tag,
Query: q7,
Dimension: QueryRecency,
GraphNodeID: node.ID,
ProductFitScore: fit,
RecencyDays: IdealMaxPostAgeDays,
})
}
q30 := BuildRecencyQuery(tag, MaxPostAgeDays)
if q30 != "" && q30 != q7 {
out = append(out, TagQuery{
Tag: tag,
Query: q30,
Dimension: QueryRecency,
GraphNodeID: node.ID,
ProductFitScore: fit,
RecencyDays: MaxPostAgeDays,
})
}
}
}
return out
}
// RunDualTrackDiscover executes relevance + recency queries and merges by permalink.
func RunDualTrackDiscover(ctx context.Context, input DualTrackInput, onProgress DualTrackProgress) ([]ScanCandidate, error) {
queries := CollectTagQueries(input.Nodes)
if len(queries) == 0 {
return nil, fmt.Errorf("沒有勾選的節點或可用 tag")
}
merged := map[string]*ScanCandidate{}
order := make([]string, 0, 64)
runQuery := func(tq TagQuery, limit int) error {
posts, channel, err := discoverForQuery(ctx, input, tq, limit)
if err != nil {
return err
}
for _, post := range posts {
if MatchesExclusion(post.Text, input.Exclusions) {
continue
}
if !PassesPlacementFilter(post.Text) {
continue
}
key := post.Permalink
if key == "" {
continue
}
existing, ok := merged[key]
if !ok {
priority := "relevant"
if tq.Dimension == QueryRecency {
priority = "recent"
}
extID := post.ExternalID
if extID == "" {
if parsed, ok := ParseThreadsPostFromWebResult(post.Text, "", post.Permalink); ok {
extID = parsed.ExternalID
}
}
merged[key] = &ScanCandidate{
Permalink: post.Permalink,
ExternalID: extID,
Author: post.Author,
Text: post.Text,
SearchTag: tq.Tag,
QueryDimension: tq.Dimension,
GraphNodeID: tq.GraphNodeID,
ProductFitScore: tq.ProductFitScore,
Source: channel,
HasRelevance: tq.Dimension == QueryRelevance,
HasRecency: tq.Dimension == QueryRecency,
Priority: priority,
PlacementScore: computePlacementScore(post.Text, tq.ProductFitScore, tq.Dimension == QueryRecency),
SolvedByProduct: tq.ProductFitScore >= 55,
}
order = append(order, key)
continue
}
if tq.Dimension == QueryRelevance {
existing.HasRelevance = true
}
if tq.Dimension == QueryRecency {
existing.HasRecency = true
}
if tq.ProductFitScore > existing.ProductFitScore {
existing.ProductFitScore = tq.ProductFitScore
existing.SolvedByProduct = tq.ProductFitScore >= 55
}
}
return nil
}
total := len(queries)
for i, tq := range queries {
if onProgress != nil {
pct := 10 + ((i + 1) * 75 / max(total, 1))
onProgress(fmt.Sprintf("雙軌海巡 %d/%d%s", i+1, total, tq.Tag), pct)
}
limit := relevanceLimitPerTag
if tq.Dimension == QueryRecency {
limit = recencyLimitPerTag
}
if err := runQuery(tq, limit); err != nil {
return nil, err
}
}
out := make([]ScanCandidate, 0, len(order))
for _, key := range order {
item := merged[key]
if item.HasRelevance && item.HasRecency && item.ProductFitScore >= 45 {
item.Priority = "gold"
} else if item.HasRecency {
item.Priority = "recent"
} else {
item.Priority = "relevant"
}
if item.ProductFitScore < 30 && item.Priority != "gold" {
continue
}
item.PlacementScore = computePlacementScore(item.Text, item.ProductFitScore, item.HasRecency)
item.SolvedByProduct = item.ProductFitScore >= 55
out = append(out, *item)
}
if onProgress != nil {
onProgress(fmt.Sprintf("合併完成,共 %d 篇候選貼文", len(out)), 90)
}
return out, nil
}
func discoverForQuery(ctx context.Context, input DualTrackInput, tq TagQuery, limit int) ([]DiscoverPost, DiscoverChannel, error) {
req := DiscoverRequest{
Query: tq.Query,
Keyword: tq.Tag,
Recency: tq.Dimension == QueryRecency,
Limit: limit,
Member: input.Member,
Crawler: input.Crawler,
}
posts, channel, err := Discover(ctx, req)
if err == nil && len(posts) > 0 {
return posts, channel, nil
}
if input.Client == nil || !input.Client.Enabled() {
if err != nil {
return nil, "", err
}
return nil, "", fmt.Errorf("Brave 未設定且 Threads API 無結果")
}
bravePosts, berr := discoverViaBrave(ctx, input.Client, input.Member, tq.Query, limit)
if berr != nil {
if err != nil {
return nil, "", err
}
return nil, "", berr
}
return bravePosts, DiscoverBrave, nil
}
func discoverViaBrave(ctx context.Context, client *libbrave.Client, member MemberContext, query string, limit int) ([]DiscoverPost, error) {
res, err := client.Search(ctx, libbrave.SearchOptions{
Query: query,
Limit: limit,
Mode: libbrave.ModeThreadsDiscover,
Country: member.BraveCountry,
SearchLang: member.BraveSearchLang,
})
if err != nil {
return nil, err
}
if res.Status != "success" || len(res.Results) == 0 {
return nil, nil
}
out := make([]DiscoverPost, 0, len(res.Results))
for _, item := range res.Results {
parsed, ok := ParseThreadsPostFromWebResult(item.Title, item.Snippet, item.URL)
if !ok {
continue
}
out = append(out, DiscoverPost{
Text: parsed.Text,
Permalink: parsed.Permalink,
ExternalID: parsed.ExternalID,
Author: parsed.Author,
Source: DiscoverBrave,
})
}
return out, nil
}
func computePlacementScore(text string, productFit int, recent bool) int {
score := 30 + productFit/4
if HasPlacementIntent(text) {
score += 20
}
if LooksLikeRecommendationPost(text) {
score += 12
}
if recent {
score += 15
}
if productFit >= 60 {
score += 8
}
if score > 100 {
return 100
}
return score
}
func max(a, b int) int {
if a > b {
return a
}
return b
}