360 lines
9.2 KiB
Go
360 lines
9.2 KiB
Go
package placement
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"strings"
|
||
"time"
|
||
|
||
libbrave "haixun-backend/internal/library/brave"
|
||
libkg "haixun-backend/internal/library/knowledge"
|
||
)
|
||
|
||
const (
|
||
relevanceLimitPerTag = 12
|
||
recencyLimitPerTag = 8
|
||
)
|
||
|
||
type ScanCandidate struct {
|
||
Permalink string
|
||
ExternalID string
|
||
Author string
|
||
Text string
|
||
SearchTag string
|
||
QueryDimension QueryDimension
|
||
GraphNodeID string
|
||
ProductFitScore int
|
||
Source DiscoverChannel
|
||
HasRelevance bool
|
||
HasRecency bool
|
||
Priority string
|
||
LikeCount int
|
||
ReplyCount int
|
||
EngagementScore int
|
||
PlacementScore int
|
||
SolvedByProduct bool
|
||
PostedAt string
|
||
Replies []ReplyCandidate
|
||
}
|
||
|
||
type DualTrackInput struct {
|
||
Nodes []libkg.Node
|
||
PatrolKeywords []string
|
||
Exclusions []string
|
||
Member MemberContext
|
||
Client *libbrave.Client
|
||
Crawler CrawlerSearchFn
|
||
Limit int // max queries budget; 0 = default
|
||
OnCheckpoint func(candidates []ScanCandidate) error
|
||
}
|
||
|
||
type DualTrackProgress func(message string, pct int)
|
||
|
||
// CollectTagQueries builds crawl jobs from selected graph nodes.
|
||
func CollectTagQueries(nodes []libkg.Node) []TagQuery {
|
||
out := make([]TagQuery, 0, len(nodes)*4)
|
||
for _, node := range nodes {
|
||
if !node.SelectedForScan {
|
||
continue
|
||
}
|
||
fit := node.ProductFitScore
|
||
derived := node.DerivedTags
|
||
if len(derived.Relevance) == 0 && len(derived.Recency) == 0 {
|
||
derived = libkg.DerivePatrolTagsForNode(node, libkg.PatrolTagInput{})
|
||
}
|
||
for _, tag := range derived.Relevance {
|
||
tag = strings.TrimSpace(tag)
|
||
if tag == "" {
|
||
continue
|
||
}
|
||
q := BuildRelevanceQuery(tag)
|
||
if q == "" {
|
||
continue
|
||
}
|
||
out = append(out, TagQuery{
|
||
Tag: tag,
|
||
Query: q,
|
||
Dimension: QueryRelevance,
|
||
GraphNodeID: node.ID,
|
||
ProductFitScore: fit,
|
||
})
|
||
}
|
||
for _, tag := range derived.Recency {
|
||
tag = strings.TrimSpace(tag)
|
||
if tag == "" {
|
||
continue
|
||
}
|
||
q7 := BuildRecencyQuery(tag, IdealMaxPostAgeDays)
|
||
if q7 != "" {
|
||
out = append(out, TagQuery{
|
||
Tag: tag,
|
||
Query: q7,
|
||
Dimension: QueryRecency,
|
||
GraphNodeID: node.ID,
|
||
ProductFitScore: fit,
|
||
RecencyDays: IdealMaxPostAgeDays,
|
||
})
|
||
}
|
||
q30 := BuildRecencyQuery(tag, MaxPostAgeDays)
|
||
if q30 != "" && q30 != q7 {
|
||
out = append(out, TagQuery{
|
||
Tag: tag,
|
||
Query: q30,
|
||
Dimension: QueryRecency,
|
||
GraphNodeID: node.ID,
|
||
ProductFitScore: fit,
|
||
RecencyDays: MaxPostAgeDays,
|
||
})
|
||
}
|
||
}
|
||
}
|
||
return out
|
||
}
|
||
|
||
// RunDualTrackDiscover executes relevance + recency queries and merges by permalink.
|
||
func RunDualTrackDiscover(ctx context.Context, input DualTrackInput, onProgress DualTrackProgress) ([]ScanCandidate, error) {
|
||
queries := ResolveTagQueries(input.Nodes, input.PatrolKeywords)
|
||
if len(queries) == 0 {
|
||
if len(input.PatrolKeywords) > 0 {
|
||
return nil, fmt.Errorf("海巡關鍵字格式無效,請改用 2~8 字的真人搜尋短句")
|
||
}
|
||
selected := 0
|
||
for _, node := range input.Nodes {
|
||
if node.SelectedForScan {
|
||
selected++
|
||
}
|
||
}
|
||
if selected > 0 {
|
||
return nil, fmt.Errorf("已勾選節點但沒有可用的海巡 tag,請重新擴展圖譜或手動編輯 tag")
|
||
}
|
||
return nil, fmt.Errorf("請先勾選要海巡的節點並儲存")
|
||
}
|
||
|
||
merged := map[string]*ScanCandidate{}
|
||
order := make([]string, 0, 64)
|
||
|
||
runQuery := func(tq TagQuery, limit int) error {
|
||
posts, channel, err := discoverForQuery(ctx, input, tq, limit)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
for _, post := range posts {
|
||
if MatchesExclusion(post.Text, input.Exclusions) {
|
||
continue
|
||
}
|
||
if !PassesPlacementFilter(post.Text) {
|
||
continue
|
||
}
|
||
key := post.Permalink
|
||
if key == "" {
|
||
continue
|
||
}
|
||
existing, ok := merged[key]
|
||
if !ok {
|
||
priority := "relevant"
|
||
if tq.Dimension == QueryRecency {
|
||
priority = "recent"
|
||
}
|
||
extID := post.ExternalID
|
||
if extID == "" {
|
||
if parsed, ok := ParseThreadsPostFromWebResult(post.Text, "", post.Permalink); ok {
|
||
extID = parsed.ExternalID
|
||
}
|
||
}
|
||
merged[key] = &ScanCandidate{
|
||
Permalink: post.Permalink,
|
||
ExternalID: extID,
|
||
Author: post.Author,
|
||
Text: post.Text,
|
||
SearchTag: tq.Tag,
|
||
QueryDimension: tq.Dimension,
|
||
GraphNodeID: tq.GraphNodeID,
|
||
ProductFitScore: tq.ProductFitScore,
|
||
Source: channel,
|
||
HasRelevance: tq.Dimension == QueryRelevance,
|
||
HasRecency: tq.Dimension == QueryRecency,
|
||
Priority: priority,
|
||
PlacementScore: computePlacementScore(post.Text, tq.ProductFitScore, tq.Dimension == QueryRecency),
|
||
SolvedByProduct: tq.ProductFitScore >= 55,
|
||
PostedAt: strings.TrimSpace(post.PostedAt),
|
||
}
|
||
order = append(order, key)
|
||
continue
|
||
}
|
||
if tq.Dimension == QueryRelevance {
|
||
existing.HasRelevance = true
|
||
}
|
||
if tq.Dimension == QueryRecency {
|
||
existing.HasRecency = true
|
||
}
|
||
if tq.ProductFitScore > existing.ProductFitScore {
|
||
existing.ProductFitScore = tq.ProductFitScore
|
||
existing.SolvedByProduct = tq.ProductFitScore >= 55
|
||
}
|
||
if strings.TrimSpace(existing.PostedAt) == "" && strings.TrimSpace(post.PostedAt) != "" {
|
||
existing.PostedAt = strings.TrimSpace(post.PostedAt)
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
total := len(queries)
|
||
for i, tq := range queries {
|
||
if onProgress != nil {
|
||
pct := 10 + ((i + 1) * 75 / max(total, 1))
|
||
onProgress(fmt.Sprintf("雙軌海巡 %d/%d:%s", i+1, total, tq.Tag), pct)
|
||
}
|
||
limit := relevanceLimitPerTag
|
||
if tq.Dimension == QueryRecency {
|
||
limit = recencyLimitPerTag
|
||
}
|
||
if err := runQuery(tq, limit); err != nil {
|
||
return nil, err
|
||
}
|
||
if input.OnCheckpoint != nil {
|
||
snapshot := snapshotMergedCandidates(merged, order, false)
|
||
if err := input.OnCheckpoint(snapshot); err != nil {
|
||
return nil, err
|
||
}
|
||
}
|
||
if input.Member.AllowsCrawler && input.Member.DevMode && i < total-1 {
|
||
if err := politeDiscoverPause(ctx); err != nil {
|
||
return nil, err
|
||
}
|
||
}
|
||
}
|
||
|
||
out := snapshotMergedCandidates(merged, order, true)
|
||
if onProgress != nil {
|
||
onProgress(fmt.Sprintf("合併完成,共 %d 篇候選貼文", len(out)), 90)
|
||
}
|
||
return out, nil
|
||
}
|
||
|
||
func discoverForQuery(ctx context.Context, input DualTrackInput, tq TagQuery, limit int) ([]DiscoverPost, DiscoverChannel, error) {
|
||
req := DiscoverRequest{
|
||
Query: tq.Query,
|
||
Keyword: tq.Tag,
|
||
Recency: tq.Dimension == QueryRecency,
|
||
Limit: limit,
|
||
Member: input.Member,
|
||
Crawler: input.Crawler,
|
||
}
|
||
posts, channel, err := Discover(ctx, req)
|
||
if err == nil && len(posts) > 0 {
|
||
return posts, channel, nil
|
||
}
|
||
if input.Client == nil || !input.Client.Enabled() {
|
||
if err != nil {
|
||
return nil, "", err
|
||
}
|
||
return nil, "", fmt.Errorf("Brave 未設定且 Threads API 無結果")
|
||
}
|
||
bravePosts, berr := discoverViaBrave(ctx, input.Client, input.Member, tq.Query, limit)
|
||
if berr != nil {
|
||
if err != nil {
|
||
return nil, "", err
|
||
}
|
||
return nil, "", berr
|
||
}
|
||
return bravePosts, DiscoverBrave, nil
|
||
}
|
||
|
||
func discoverViaBrave(ctx context.Context, client *libbrave.Client, member MemberContext, query string, limit int) ([]DiscoverPost, error) {
|
||
res, err := client.Search(ctx, libbrave.SearchOptions{
|
||
Query: query,
|
||
Limit: limit,
|
||
Mode: libbrave.ModeThreadsDiscover,
|
||
Country: member.BraveCountry,
|
||
SearchLang: member.BraveSearchLang,
|
||
})
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if res.Status != "success" || len(res.Results) == 0 {
|
||
return nil, nil
|
||
}
|
||
out := make([]DiscoverPost, 0, len(res.Results))
|
||
for _, item := range res.Results {
|
||
parsed, ok := ParseThreadsPostFromWebResult(item.Title, item.Snippet, item.URL)
|
||
if !ok {
|
||
continue
|
||
}
|
||
out = append(out, DiscoverPost{
|
||
Text: parsed.Text,
|
||
Permalink: parsed.Permalink,
|
||
ExternalID: parsed.ExternalID,
|
||
Author: parsed.Author,
|
||
Source: DiscoverBrave,
|
||
})
|
||
}
|
||
return out, nil
|
||
}
|
||
|
||
func snapshotMergedCandidates(merged map[string]*ScanCandidate, order []string, applyFinalFilter bool) []ScanCandidate {
|
||
out := make([]ScanCandidate, 0, len(order))
|
||
for _, key := range order {
|
||
item := merged[key]
|
||
finalizeScanCandidate(item)
|
||
if applyFinalFilter && item.ProductFitScore < 30 && item.Priority != "gold" {
|
||
continue
|
||
}
|
||
out = append(out, *item)
|
||
}
|
||
return out
|
||
}
|
||
|
||
func finalizeScanCandidate(item *ScanCandidate) {
|
||
if item == nil {
|
||
return
|
||
}
|
||
if item.HasRelevance && item.HasRecency && item.ProductFitScore >= 45 {
|
||
item.Priority = "gold"
|
||
} else if item.HasRecency {
|
||
item.Priority = "recent"
|
||
} else {
|
||
item.Priority = "relevant"
|
||
}
|
||
item.PlacementScore = computePlacementScore(item.Text, item.ProductFitScore, item.HasRecency)
|
||
item.SolvedByProduct = item.ProductFitScore >= 55
|
||
}
|
||
|
||
func computePlacementScore(text string, productFit int, recent bool) int {
|
||
score := 30 + productFit/4
|
||
if HasPlacementIntent(text) {
|
||
score += 20
|
||
}
|
||
if LooksLikeRecommendationPost(text) {
|
||
score += 12
|
||
}
|
||
if recent {
|
||
score += 15
|
||
}
|
||
if productFit >= 60 {
|
||
score += 8
|
||
}
|
||
if score > 100 {
|
||
return 100
|
||
}
|
||
return score
|
||
}
|
||
|
||
func max(a, b int) int {
|
||
if a > b {
|
||
return a
|
||
}
|
||
return b
|
||
}
|
||
|
||
func politeDiscoverPause(ctx context.Context) error {
|
||
wait := 2*time.Second + jitterDuration(2*time.Second)
|
||
timer := time.NewTimer(wait)
|
||
defer timer.Stop()
|
||
select {
|
||
case <-ctx.Done():
|
||
return ctx.Err()
|
||
case <-timer.C:
|
||
return nil
|
||
}
|
||
}
|