haixunMaster/haixun-backend/internal/library/viral/discover.go

234 lines
6.0 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package viral
import (
"context"
"fmt"
"strings"
"haixun-backend/internal/library/placement"
)
const (
defaultLimitPerKeyword = 15
missionLimitPerKeyword = 10
maxKeywords = 6
maxMergedPosts = 60
missionMaxMergedPosts = 40
missionQualityTarget = 12 // stop scanning extra keywords once enough quality posts
)
type DiscoverInput struct {
Keywords []string
Exclusions []string
Member placement.MemberContext
Crawler placement.CrawlerSearchFn
Limit int // per keyword; 0 = default
MaxMerged int // total cap; 0 = default
MissionScan bool // leaner defaults to save search API quota
}
type ProgressFn func(message string, pct int)
// RunDiscover searches Threads for viral candidates across keywords, ranked by engagement.
func RunDiscover(ctx context.Context, input DiscoverInput, progress ProgressFn) ([]placement.ScanCandidate, error) {
keywords := normalizeKeywords(input.Keywords)
if len(keywords) == 0 {
return nil, fmt.Errorf("請提供至少一個爆款掃描關鍵字")
}
perKeyword := input.Limit
if perKeyword <= 0 {
if input.MissionScan {
perKeyword = missionLimitPerKeyword
} else {
perKeyword = defaultLimitPerKeyword
}
}
maxMerged := input.MaxMerged
if maxMerged <= 0 {
if input.MissionScan {
maxMerged = missionMaxMergedPosts
} else {
maxMerged = maxMergedPosts
}
}
merged := map[string]placement.ScanCandidate{}
relaxed := map[string]placement.ScanCandidate{}
total := len(keywords)
pathLabel := input.Member.DiscoverPathLabel()
var lastErr error
keywordsAttempted := 0
for i, keyword := range keywords {
if input.MissionScan && countMissionQuality(merged) >= missionQualityTarget {
if progress != nil {
progress(fmt.Sprintf("已收足 %d 篇品質候選,略過剩餘標籤以節省搜尋次數", missionQualityTarget), 10+(i*70)/max(total, 1))
}
break
}
if progress != nil {
pct := 10 + (i*70)/total
progress(fmt.Sprintf("掃描「%s」%s…", keyword, pathLabel), pct)
}
limit := perKeyword
if input.MissionScan && len(merged) > 0 {
limit = min(perKeyword, 8)
}
posts, _, err := placement.Discover(ctx, placement.DiscoverRequest{
Query: keyword,
Keyword: keyword,
Limit: limit,
Member: input.Member,
Crawler: input.Crawler,
})
if err != nil {
lastErr = err
if progress != nil {
progress(fmt.Sprintf("「%s」搜尋略過%s", keyword, shortenDiscoverErr(err)), 10+(i*70)/max(total, 1))
}
continue
}
keywordsAttempted++
for _, post := range posts {
key := strings.TrimSpace(post.Permalink)
if key == "" {
key = strings.TrimSpace(post.ExternalID)
}
if key == "" {
continue
}
score := ScorePost(post.LikeCount, post.ReplyCount)
candidate := placement.ScanCandidate{
Permalink: post.Permalink,
ExternalID: post.ExternalID,
Author: post.Author,
AuthorVerified: post.AuthorVerified,
FollowerCount: post.FollowerCount,
Text: post.Text,
SearchTag: keyword,
Source: post.Source,
LikeCount: post.LikeCount,
ReplyCount: post.ReplyCount,
EngagementScore: score,
PlacementScore: score,
Priority: PriorityLabel(score),
}
if input.MissionScan {
if PassesMissionQualityCandidate(
post.Text, post.LikeCount, post.ReplyCount, score,
post.AuthorVerified, post.FollowerCount, input.Exclusions,
) {
mergeCandidate(merged, key, candidate)
continue
}
if PassesViralCandidate(post.Text, post.LikeCount, post.ReplyCount, score, input.Exclusions) {
mergeCandidate(relaxed, key, candidate)
}
continue
}
if !PassesViralCandidate(post.Text, post.LikeCount, post.ReplyCount, score, input.Exclusions) {
continue
}
mergeCandidate(merged, key, candidate)
}
}
if input.MissionScan && len(merged) == 0 && len(relaxed) > 0 {
merged = relaxed
if progress != nil {
progress("未取得藍勾等延伸資料,改以互動門檻收斂爆款候選", 82)
}
}
out := candidatesFromMap(merged)
sortByEngagement(out)
if len(out) > maxMerged {
out = out[:maxMerged]
}
if len(out) == 0 {
if keywordsAttempted == 0 && lastErr != nil {
return nil, fmt.Errorf("所有標籤搜尋均失敗:%w", lastErr)
}
}
if progress != nil {
progress(fmt.Sprintf("合併 %d 篇爆款候選", len(out)), 85)
}
return out, nil
}
func mergeCandidate(merged map[string]placement.ScanCandidate, key string, candidate placement.ScanCandidate) {
if prev, ok := merged[key]; !ok {
merged[key] = candidate
} else if candidate.EngagementScore > prev.EngagementScore {
merged[key] = MergeAuthorSignals(candidate, prev)
} else {
merged[key] = MergeAuthorSignals(prev, candidate)
}
}
func candidatesFromMap(merged map[string]placement.ScanCandidate) []placement.ScanCandidate {
out := make([]placement.ScanCandidate, 0, len(merged))
for _, item := range merged {
out = append(out, item)
}
return out
}
func shortenDiscoverErr(err error) string {
msg := strings.TrimSpace(err.Error())
if len(msg) > 80 {
return msg[:80] + "…"
}
return msg
}
func normalizeKeywords(raw []string) []string {
seen := map[string]struct{}{}
out := make([]string, 0, len(raw))
for _, item := range raw {
kw := DiscoverKeywordFromTag(item)
if kw == "" {
continue
}
if _, ok := seen[kw]; ok {
continue
}
seen[kw] = struct{}{}
out = append(out, kw)
if len(out) >= maxKeywords {
break
}
}
return out
}
func countMissionQuality(merged map[string]placement.ScanCandidate) int {
n := 0
for _, item := range merged {
if PassesMissionQualityCandidate(
item.Text, item.LikeCount, item.ReplyCount, item.EngagementScore,
item.AuthorVerified, item.FollowerCount, nil,
) {
n++
}
}
return n
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func sortByEngagement(items []placement.ScanCandidate) {
for i := 0; i < len(items); i++ {
for j := i + 1; j < len(items); j++ {
if items[j].EngagementScore > items[i].EngagementScore {
items[i], items[j] = items[j], items[i]
}
}
}
}