301 lines
7.5 KiB
Go
301 lines
7.5 KiB
Go
|
|
package placement
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"context"
|
|||
|
|
"fmt"
|
|||
|
|
"strings"
|
|||
|
|
|
|||
|
|
libbrave "haixun-backend/internal/library/brave"
|
|||
|
|
libkg "haixun-backend/internal/library/knowledge"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
const (
|
|||
|
|
relevanceLimitPerTag = 12
|
|||
|
|
recencyLimitPerTag = 8
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
type ScanCandidate struct {
|
|||
|
|
Permalink string
|
|||
|
|
ExternalID string
|
|||
|
|
Author string
|
|||
|
|
Text string
|
|||
|
|
SearchTag string
|
|||
|
|
QueryDimension QueryDimension
|
|||
|
|
GraphNodeID string
|
|||
|
|
ProductFitScore int
|
|||
|
|
Source DiscoverChannel
|
|||
|
|
HasRelevance bool
|
|||
|
|
HasRecency bool
|
|||
|
|
Priority string
|
|||
|
|
LikeCount int
|
|||
|
|
ReplyCount int
|
|||
|
|
EngagementScore int
|
|||
|
|
PlacementScore int
|
|||
|
|
SolvedByProduct bool
|
|||
|
|
Replies []ReplyCandidate
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
type DualTrackInput struct {
|
|||
|
|
Nodes []libkg.Node
|
|||
|
|
Exclusions []string
|
|||
|
|
Member MemberContext
|
|||
|
|
Client *libbrave.Client
|
|||
|
|
Crawler CrawlerSearchFn
|
|||
|
|
Limit int // max queries budget; 0 = default
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
type DualTrackProgress func(message string, pct int)
|
|||
|
|
|
|||
|
|
// CollectTagQueries builds crawl jobs from selected graph nodes.
|
|||
|
|
func CollectTagQueries(nodes []libkg.Node) []TagQuery {
|
|||
|
|
out := make([]TagQuery, 0, len(nodes)*4)
|
|||
|
|
for _, node := range nodes {
|
|||
|
|
if !node.SelectedForScan {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
fit := node.ProductFitScore
|
|||
|
|
for _, tag := range node.DerivedTags.Relevance {
|
|||
|
|
tag = strings.TrimSpace(tag)
|
|||
|
|
if tag == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
q := BuildRelevanceQuery(tag)
|
|||
|
|
if q == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
out = append(out, TagQuery{
|
|||
|
|
Tag: tag,
|
|||
|
|
Query: q,
|
|||
|
|
Dimension: QueryRelevance,
|
|||
|
|
GraphNodeID: node.ID,
|
|||
|
|
ProductFitScore: fit,
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
for _, tag := range node.DerivedTags.Recency {
|
|||
|
|
tag = strings.TrimSpace(tag)
|
|||
|
|
if tag == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
q7 := BuildRecencyQuery(tag, IdealMaxPostAgeDays)
|
|||
|
|
if q7 != "" {
|
|||
|
|
out = append(out, TagQuery{
|
|||
|
|
Tag: tag,
|
|||
|
|
Query: q7,
|
|||
|
|
Dimension: QueryRecency,
|
|||
|
|
GraphNodeID: node.ID,
|
|||
|
|
ProductFitScore: fit,
|
|||
|
|
RecencyDays: IdealMaxPostAgeDays,
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
q30 := BuildRecencyQuery(tag, MaxPostAgeDays)
|
|||
|
|
if q30 != "" && q30 != q7 {
|
|||
|
|
out = append(out, TagQuery{
|
|||
|
|
Tag: tag,
|
|||
|
|
Query: q30,
|
|||
|
|
Dimension: QueryRecency,
|
|||
|
|
GraphNodeID: node.ID,
|
|||
|
|
ProductFitScore: fit,
|
|||
|
|
RecencyDays: MaxPostAgeDays,
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return out
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// RunDualTrackDiscover executes relevance + recency queries and merges by permalink.
|
|||
|
|
func RunDualTrackDiscover(ctx context.Context, input DualTrackInput, onProgress DualTrackProgress) ([]ScanCandidate, error) {
|
|||
|
|
queries := CollectTagQueries(input.Nodes)
|
|||
|
|
if len(queries) == 0 {
|
|||
|
|
return nil, fmt.Errorf("沒有勾選的節點或可用 tag")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
merged := map[string]*ScanCandidate{}
|
|||
|
|
order := make([]string, 0, 64)
|
|||
|
|
|
|||
|
|
runQuery := func(tq TagQuery, limit int) error {
|
|||
|
|
posts, channel, err := discoverForQuery(ctx, input, tq, limit)
|
|||
|
|
if err != nil {
|
|||
|
|
return err
|
|||
|
|
}
|
|||
|
|
for _, post := range posts {
|
|||
|
|
if MatchesExclusion(post.Text, input.Exclusions) {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
if !PassesPlacementFilter(post.Text) {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
key := post.Permalink
|
|||
|
|
if key == "" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
existing, ok := merged[key]
|
|||
|
|
if !ok {
|
|||
|
|
priority := "relevant"
|
|||
|
|
if tq.Dimension == QueryRecency {
|
|||
|
|
priority = "recent"
|
|||
|
|
}
|
|||
|
|
extID := post.ExternalID
|
|||
|
|
if extID == "" {
|
|||
|
|
if parsed, ok := ParseThreadsPostFromWebResult(post.Text, "", post.Permalink); ok {
|
|||
|
|
extID = parsed.ExternalID
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
merged[key] = &ScanCandidate{
|
|||
|
|
Permalink: post.Permalink,
|
|||
|
|
ExternalID: extID,
|
|||
|
|
Author: post.Author,
|
|||
|
|
Text: post.Text,
|
|||
|
|
SearchTag: tq.Tag,
|
|||
|
|
QueryDimension: tq.Dimension,
|
|||
|
|
GraphNodeID: tq.GraphNodeID,
|
|||
|
|
ProductFitScore: tq.ProductFitScore,
|
|||
|
|
Source: channel,
|
|||
|
|
HasRelevance: tq.Dimension == QueryRelevance,
|
|||
|
|
HasRecency: tq.Dimension == QueryRecency,
|
|||
|
|
Priority: priority,
|
|||
|
|
PlacementScore: computePlacementScore(post.Text, tq.ProductFitScore, tq.Dimension == QueryRecency),
|
|||
|
|
SolvedByProduct: tq.ProductFitScore >= 55,
|
|||
|
|
}
|
|||
|
|
order = append(order, key)
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
if tq.Dimension == QueryRelevance {
|
|||
|
|
existing.HasRelevance = true
|
|||
|
|
}
|
|||
|
|
if tq.Dimension == QueryRecency {
|
|||
|
|
existing.HasRecency = true
|
|||
|
|
}
|
|||
|
|
if tq.ProductFitScore > existing.ProductFitScore {
|
|||
|
|
existing.ProductFitScore = tq.ProductFitScore
|
|||
|
|
existing.SolvedByProduct = tq.ProductFitScore >= 55
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
total := len(queries)
|
|||
|
|
for i, tq := range queries {
|
|||
|
|
if onProgress != nil {
|
|||
|
|
pct := 10 + ((i + 1) * 75 / max(total, 1))
|
|||
|
|
onProgress(fmt.Sprintf("雙軌海巡 %d/%d:%s", i+1, total, tq.Tag), pct)
|
|||
|
|
}
|
|||
|
|
limit := relevanceLimitPerTag
|
|||
|
|
if tq.Dimension == QueryRecency {
|
|||
|
|
limit = recencyLimitPerTag
|
|||
|
|
}
|
|||
|
|
if err := runQuery(tq, limit); err != nil {
|
|||
|
|
return nil, err
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
out := make([]ScanCandidate, 0, len(order))
|
|||
|
|
for _, key := range order {
|
|||
|
|
item := merged[key]
|
|||
|
|
if item.HasRelevance && item.HasRecency && item.ProductFitScore >= 45 {
|
|||
|
|
item.Priority = "gold"
|
|||
|
|
} else if item.HasRecency {
|
|||
|
|
item.Priority = "recent"
|
|||
|
|
} else {
|
|||
|
|
item.Priority = "relevant"
|
|||
|
|
}
|
|||
|
|
if item.ProductFitScore < 30 && item.Priority != "gold" {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
item.PlacementScore = computePlacementScore(item.Text, item.ProductFitScore, item.HasRecency)
|
|||
|
|
item.SolvedByProduct = item.ProductFitScore >= 55
|
|||
|
|
out = append(out, *item)
|
|||
|
|
}
|
|||
|
|
if onProgress != nil {
|
|||
|
|
onProgress(fmt.Sprintf("合併完成,共 %d 篇候選貼文", len(out)), 90)
|
|||
|
|
}
|
|||
|
|
return out, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func discoverForQuery(ctx context.Context, input DualTrackInput, tq TagQuery, limit int) ([]DiscoverPost, DiscoverChannel, error) {
|
|||
|
|
req := DiscoverRequest{
|
|||
|
|
Query: tq.Query,
|
|||
|
|
Keyword: tq.Tag,
|
|||
|
|
Recency: tq.Dimension == QueryRecency,
|
|||
|
|
Limit: limit,
|
|||
|
|
Member: input.Member,
|
|||
|
|
Crawler: input.Crawler,
|
|||
|
|
}
|
|||
|
|
posts, channel, err := Discover(ctx, req)
|
|||
|
|
if err == nil && len(posts) > 0 {
|
|||
|
|
return posts, channel, nil
|
|||
|
|
}
|
|||
|
|
if input.Client == nil || !input.Client.Enabled() {
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, "", err
|
|||
|
|
}
|
|||
|
|
return nil, "", fmt.Errorf("Brave 未設定且 Threads API 無結果")
|
|||
|
|
}
|
|||
|
|
bravePosts, berr := discoverViaBrave(ctx, input.Client, input.Member, tq.Query, limit)
|
|||
|
|
if berr != nil {
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, "", err
|
|||
|
|
}
|
|||
|
|
return nil, "", berr
|
|||
|
|
}
|
|||
|
|
return bravePosts, DiscoverBrave, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func discoverViaBrave(ctx context.Context, client *libbrave.Client, member MemberContext, query string, limit int) ([]DiscoverPost, error) {
|
|||
|
|
res, err := client.Search(ctx, libbrave.SearchOptions{
|
|||
|
|
Query: query,
|
|||
|
|
Limit: limit,
|
|||
|
|
Mode: libbrave.ModeThreadsDiscover,
|
|||
|
|
Country: member.BraveCountry,
|
|||
|
|
SearchLang: member.BraveSearchLang,
|
|||
|
|
})
|
|||
|
|
if err != nil {
|
|||
|
|
return nil, err
|
|||
|
|
}
|
|||
|
|
if res.Status != "success" || len(res.Results) == 0 {
|
|||
|
|
return nil, nil
|
|||
|
|
}
|
|||
|
|
out := make([]DiscoverPost, 0, len(res.Results))
|
|||
|
|
for _, item := range res.Results {
|
|||
|
|
parsed, ok := ParseThreadsPostFromWebResult(item.Title, item.Snippet, item.URL)
|
|||
|
|
if !ok {
|
|||
|
|
continue
|
|||
|
|
}
|
|||
|
|
out = append(out, DiscoverPost{
|
|||
|
|
Text: parsed.Text,
|
|||
|
|
Permalink: parsed.Permalink,
|
|||
|
|
ExternalID: parsed.ExternalID,
|
|||
|
|
Author: parsed.Author,
|
|||
|
|
Source: DiscoverBrave,
|
|||
|
|
})
|
|||
|
|
}
|
|||
|
|
return out, nil
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func computePlacementScore(text string, productFit int, recent bool) int {
|
|||
|
|
score := 30 + productFit/4
|
|||
|
|
if HasPlacementIntent(text) {
|
|||
|
|
score += 20
|
|||
|
|
}
|
|||
|
|
if LooksLikeRecommendationPost(text) {
|
|||
|
|
score += 12
|
|||
|
|
}
|
|||
|
|
if recent {
|
|||
|
|
score += 15
|
|||
|
|
}
|
|||
|
|
if productFit >= 60 {
|
|||
|
|
score += 8
|
|||
|
|
}
|
|||
|
|
if score > 100 {
|
|||
|
|
return 100
|
|||
|
|
}
|
|||
|
|
return score
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
func max(a, b int) int {
|
|||
|
|
if a > b {
|
|||
|
|
return a
|
|||
|
|
}
|
|||
|
|
return b
|
|||
|
|
}
|