301 lines
7.5 KiB
Go
301 lines
7.5 KiB
Go
package placement
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"strings"
|
||
|
||
libbrave "haixun-backend/internal/library/brave"
|
||
libkg "haixun-backend/internal/library/knowledge"
|
||
)
|
||
|
||
const (
|
||
relevanceLimitPerTag = 12
|
||
recencyLimitPerTag = 8
|
||
)
|
||
|
||
type ScanCandidate struct {
|
||
Permalink string
|
||
ExternalID string
|
||
Author string
|
||
Text string
|
||
SearchTag string
|
||
QueryDimension QueryDimension
|
||
GraphNodeID string
|
||
ProductFitScore int
|
||
Source DiscoverChannel
|
||
HasRelevance bool
|
||
HasRecency bool
|
||
Priority string
|
||
LikeCount int
|
||
ReplyCount int
|
||
EngagementScore int
|
||
PlacementScore int
|
||
SolvedByProduct bool
|
||
Replies []ReplyCandidate
|
||
}
|
||
|
||
type DualTrackInput struct {
|
||
Nodes []libkg.Node
|
||
Exclusions []string
|
||
Member MemberContext
|
||
Client *libbrave.Client
|
||
Crawler CrawlerSearchFn
|
||
Limit int // max queries budget; 0 = default
|
||
}
|
||
|
||
type DualTrackProgress func(message string, pct int)
|
||
|
||
// CollectTagQueries builds crawl jobs from selected graph nodes.
|
||
func CollectTagQueries(nodes []libkg.Node) []TagQuery {
|
||
out := make([]TagQuery, 0, len(nodes)*4)
|
||
for _, node := range nodes {
|
||
if !node.SelectedForScan {
|
||
continue
|
||
}
|
||
fit := node.ProductFitScore
|
||
for _, tag := range node.DerivedTags.Relevance {
|
||
tag = strings.TrimSpace(tag)
|
||
if tag == "" {
|
||
continue
|
||
}
|
||
q := BuildRelevanceQuery(tag)
|
||
if q == "" {
|
||
continue
|
||
}
|
||
out = append(out, TagQuery{
|
||
Tag: tag,
|
||
Query: q,
|
||
Dimension: QueryRelevance,
|
||
GraphNodeID: node.ID,
|
||
ProductFitScore: fit,
|
||
})
|
||
}
|
||
for _, tag := range node.DerivedTags.Recency {
|
||
tag = strings.TrimSpace(tag)
|
||
if tag == "" {
|
||
continue
|
||
}
|
||
q7 := BuildRecencyQuery(tag, IdealMaxPostAgeDays)
|
||
if q7 != "" {
|
||
out = append(out, TagQuery{
|
||
Tag: tag,
|
||
Query: q7,
|
||
Dimension: QueryRecency,
|
||
GraphNodeID: node.ID,
|
||
ProductFitScore: fit,
|
||
RecencyDays: IdealMaxPostAgeDays,
|
||
})
|
||
}
|
||
q30 := BuildRecencyQuery(tag, MaxPostAgeDays)
|
||
if q30 != "" && q30 != q7 {
|
||
out = append(out, TagQuery{
|
||
Tag: tag,
|
||
Query: q30,
|
||
Dimension: QueryRecency,
|
||
GraphNodeID: node.ID,
|
||
ProductFitScore: fit,
|
||
RecencyDays: MaxPostAgeDays,
|
||
})
|
||
}
|
||
}
|
||
}
|
||
return out
|
||
}
|
||
|
||
// RunDualTrackDiscover executes relevance + recency queries and merges by permalink.
|
||
func RunDualTrackDiscover(ctx context.Context, input DualTrackInput, onProgress DualTrackProgress) ([]ScanCandidate, error) {
|
||
queries := CollectTagQueries(input.Nodes)
|
||
if len(queries) == 0 {
|
||
return nil, fmt.Errorf("沒有勾選的節點或可用 tag")
|
||
}
|
||
|
||
merged := map[string]*ScanCandidate{}
|
||
order := make([]string, 0, 64)
|
||
|
||
runQuery := func(tq TagQuery, limit int) error {
|
||
posts, channel, err := discoverForQuery(ctx, input, tq, limit)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
for _, post := range posts {
|
||
if MatchesExclusion(post.Text, input.Exclusions) {
|
||
continue
|
||
}
|
||
if !PassesPlacementFilter(post.Text) {
|
||
continue
|
||
}
|
||
key := post.Permalink
|
||
if key == "" {
|
||
continue
|
||
}
|
||
existing, ok := merged[key]
|
||
if !ok {
|
||
priority := "relevant"
|
||
if tq.Dimension == QueryRecency {
|
||
priority = "recent"
|
||
}
|
||
extID := post.ExternalID
|
||
if extID == "" {
|
||
if parsed, ok := ParseThreadsPostFromWebResult(post.Text, "", post.Permalink); ok {
|
||
extID = parsed.ExternalID
|
||
}
|
||
}
|
||
merged[key] = &ScanCandidate{
|
||
Permalink: post.Permalink,
|
||
ExternalID: extID,
|
||
Author: post.Author,
|
||
Text: post.Text,
|
||
SearchTag: tq.Tag,
|
||
QueryDimension: tq.Dimension,
|
||
GraphNodeID: tq.GraphNodeID,
|
||
ProductFitScore: tq.ProductFitScore,
|
||
Source: channel,
|
||
HasRelevance: tq.Dimension == QueryRelevance,
|
||
HasRecency: tq.Dimension == QueryRecency,
|
||
Priority: priority,
|
||
PlacementScore: computePlacementScore(post.Text, tq.ProductFitScore, tq.Dimension == QueryRecency),
|
||
SolvedByProduct: tq.ProductFitScore >= 55,
|
||
}
|
||
order = append(order, key)
|
||
continue
|
||
}
|
||
if tq.Dimension == QueryRelevance {
|
||
existing.HasRelevance = true
|
||
}
|
||
if tq.Dimension == QueryRecency {
|
||
existing.HasRecency = true
|
||
}
|
||
if tq.ProductFitScore > existing.ProductFitScore {
|
||
existing.ProductFitScore = tq.ProductFitScore
|
||
existing.SolvedByProduct = tq.ProductFitScore >= 55
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
total := len(queries)
|
||
for i, tq := range queries {
|
||
if onProgress != nil {
|
||
pct := 10 + ((i + 1) * 75 / max(total, 1))
|
||
onProgress(fmt.Sprintf("雙軌海巡 %d/%d:%s", i+1, total, tq.Tag), pct)
|
||
}
|
||
limit := relevanceLimitPerTag
|
||
if tq.Dimension == QueryRecency {
|
||
limit = recencyLimitPerTag
|
||
}
|
||
if err := runQuery(tq, limit); err != nil {
|
||
return nil, err
|
||
}
|
||
}
|
||
|
||
out := make([]ScanCandidate, 0, len(order))
|
||
for _, key := range order {
|
||
item := merged[key]
|
||
if item.HasRelevance && item.HasRecency && item.ProductFitScore >= 45 {
|
||
item.Priority = "gold"
|
||
} else if item.HasRecency {
|
||
item.Priority = "recent"
|
||
} else {
|
||
item.Priority = "relevant"
|
||
}
|
||
if item.ProductFitScore < 30 && item.Priority != "gold" {
|
||
continue
|
||
}
|
||
item.PlacementScore = computePlacementScore(item.Text, item.ProductFitScore, item.HasRecency)
|
||
item.SolvedByProduct = item.ProductFitScore >= 55
|
||
out = append(out, *item)
|
||
}
|
||
if onProgress != nil {
|
||
onProgress(fmt.Sprintf("合併完成,共 %d 篇候選貼文", len(out)), 90)
|
||
}
|
||
return out, nil
|
||
}
|
||
|
||
func discoverForQuery(ctx context.Context, input DualTrackInput, tq TagQuery, limit int) ([]DiscoverPost, DiscoverChannel, error) {
|
||
req := DiscoverRequest{
|
||
Query: tq.Query,
|
||
Keyword: tq.Tag,
|
||
Recency: tq.Dimension == QueryRecency,
|
||
Limit: limit,
|
||
Member: input.Member,
|
||
Crawler: input.Crawler,
|
||
}
|
||
posts, channel, err := Discover(ctx, req)
|
||
if err == nil && len(posts) > 0 {
|
||
return posts, channel, nil
|
||
}
|
||
if input.Client == nil || !input.Client.Enabled() {
|
||
if err != nil {
|
||
return nil, "", err
|
||
}
|
||
return nil, "", fmt.Errorf("Brave 未設定且 Threads API 無結果")
|
||
}
|
||
bravePosts, berr := discoverViaBrave(ctx, input.Client, input.Member, tq.Query, limit)
|
||
if berr != nil {
|
||
if err != nil {
|
||
return nil, "", err
|
||
}
|
||
return nil, "", berr
|
||
}
|
||
return bravePosts, DiscoverBrave, nil
|
||
}
|
||
|
||
func discoverViaBrave(ctx context.Context, client *libbrave.Client, member MemberContext, query string, limit int) ([]DiscoverPost, error) {
|
||
res, err := client.Search(ctx, libbrave.SearchOptions{
|
||
Query: query,
|
||
Limit: limit,
|
||
Mode: libbrave.ModeThreadsDiscover,
|
||
Country: member.BraveCountry,
|
||
SearchLang: member.BraveSearchLang,
|
||
})
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if res.Status != "success" || len(res.Results) == 0 {
|
||
return nil, nil
|
||
}
|
||
out := make([]DiscoverPost, 0, len(res.Results))
|
||
for _, item := range res.Results {
|
||
parsed, ok := ParseThreadsPostFromWebResult(item.Title, item.Snippet, item.URL)
|
||
if !ok {
|
||
continue
|
||
}
|
||
out = append(out, DiscoverPost{
|
||
Text: parsed.Text,
|
||
Permalink: parsed.Permalink,
|
||
ExternalID: parsed.ExternalID,
|
||
Author: parsed.Author,
|
||
Source: DiscoverBrave,
|
||
})
|
||
}
|
||
return out, nil
|
||
}
|
||
|
||
func computePlacementScore(text string, productFit int, recent bool) int {
|
||
score := 30 + productFit/4
|
||
if HasPlacementIntent(text) {
|
||
score += 20
|
||
}
|
||
if LooksLikeRecommendationPost(text) {
|
||
score += 12
|
||
}
|
||
if recent {
|
||
score += 15
|
||
}
|
||
if productFit >= 60 {
|
||
score += 8
|
||
}
|
||
if score > 100 {
|
||
return 100
|
||
}
|
||
return score
|
||
}
|
||
|
||
func max(a, b int) int {
|
||
if a > b {
|
||
return a
|
||
}
|
||
return b
|
||
}
|