thread-master/internal/library/viral/discover_accounts.go

206 lines
4.9 KiB
Go
Raw Normal View History

2026-06-26 08:37:04 +00:00
package viral
import (
"context"
"regexp"
"sort"
"strings"
"haixun-backend/internal/library/websearch"
)
const (
maxAccountDiscoverQueries = 2
MaxSimilarAccounts = 5
)
var threadsProfileRE = regexp.MustCompile(`(?i)threads\.(?:com|net)/@([a-zA-Z0-9._]+)`)
var reservedUsernames = map[string]struct{}{
"login": {}, "signup": {}, "search": {}, "explore": {}, "home": {},
"help": {}, "about": {}, "privacy": {}, "terms": {}, "settings": {},
"threads": {}, "thread": {}, "instagram": {}, "meta": {}, "www": {},
}
type SimilarAccount struct {
Username string `json:"username"`
Reason string `json:"reason"`
Source string `json:"source"`
Confidence string `json:"confidence"`
ProfileURL string `json:"profileUrl"`
}
type DiscoverAccountsInput struct {
SeedQuery string
Brief string
Pillars []string
}
type accountCandidate struct {
username string
score int
reason string
source string
}
func DiscoverSimilarAccounts(ctx context.Context, client websearch.Client, input DiscoverAccountsInput) ([]SimilarAccount, error) {
if client == nil || !client.Enabled() {
return nil, nil
}
seed := strings.TrimSpace(input.SeedQuery)
if seed == "" {
return nil, nil
}
queries := buildAccountDiscoverQueries(seed, input.Brief, input.Pillars)
if len(queries) == 0 {
return nil, nil
}
seen := map[string]accountCandidate{}
for _, query := range queries {
res, err := client.Search(ctx, websearch.SearchOptions{
Query: query,
Limit: 12,
Mode: websearch.ModeThreadsDiscover,
})
if err != nil || res.Status != "success" {
continue
}
for _, item := range res.Results {
blob := strings.TrimSpace(item.URL + " " + item.Title + " " + item.Snippet)
for _, username := range extractUsernames(blob) {
weight := 2
if strings.Contains(strings.ToLower(item.URL), "/@"+strings.ToLower(username)) {
weight = 4
}
reason := strings.TrimSpace(item.Snippet)
if reason == "" {
reason = strings.TrimSpace(item.Title)
}
if reason == "" {
reason = "在「" + seed + "」相關搜尋結果中找到"
}
if len([]rune(reason)) > 120 {
reason = string([]rune(reason)[:120])
}
key := strings.ToLower(username)
prev, ok := seen[key]
if !ok || weight > prev.score {
seen[key] = accountCandidate{
username: username,
score: weight,
reason: reason,
source: "web",
}
} else if ok {
prev.score += 1
seen[key] = prev
}
}
}
}
out := make([]accountCandidate, 0, len(seen))
for _, item := range seen {
out = append(out, item)
}
sort.Slice(out, func(i, j int) bool { return out[i].score > out[j].score })
if len(out) > MaxSimilarAccounts {
out = out[:MaxSimilarAccounts]
}
accounts := make([]SimilarAccount, 0, len(out))
for _, item := range out {
accounts = append(accounts, SimilarAccount{
Username: item.username,
Reason: item.reason,
Source: item.source,
Confidence: accountConfidence(item.score),
ProfileURL: "https://www.threads.net/@" + item.username,
})
}
return accounts, nil
}
func buildAccountDiscoverQueries(seed, brief string, pillars []string) []string {
quoted := `"` + seed + `"`
queries := []string{
`site:threads.net ` + quoted,
`threads ` + quoted + ` 創作者`,
}
if hint := strings.TrimSpace(brief); len([]rune(hint)) >= 4 && len([]rune(hint)) <= 24 {
queries = append(queries, `site:threads.net `+quoted+` `+hint)
}
for _, pillar := range pillars {
pillar = strings.TrimSpace(pillar)
if len([]rune(pillar)) >= 4 && len(queries) < maxAccountDiscoverQueries+1 {
queries = append(queries, `site:threads.net "`+pillar+`"`)
}
}
unique := []string{}
seen := map[string]struct{}{}
for _, q := range queries {
q = strings.TrimSpace(q)
if q == "" {
continue
}
if _, ok := seen[q]; ok {
continue
}
seen[q] = struct{}{}
unique = append(unique, q)
if len(unique) >= maxAccountDiscoverQueries {
break
}
}
return unique
}
func extractUsernames(blob string) []string {
matches := threadsProfileRE.FindAllStringSubmatch(blob, -1)
out := []string{}
seen := map[string]struct{}{}
for _, match := range matches {
if len(match) < 2 {
continue
}
user := strings.TrimSpace(match[1])
if !isValidUsername(user) {
continue
}
key := strings.ToLower(user)
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
out = append(out, user)
}
return out
}
func isValidUsername(username string) bool {
if username == "" || len(username) < 2 || len(username) > 30 {
return false
}
if _, ok := reservedUsernames[strings.ToLower(username)]; ok {
return false
}
for _, r := range username {
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '.' || r == '_' {
continue
}
return false
}
return true
}
func accountConfidence(score int) string {
if score >= 5 {
return "high"
}
if score >= 3 {
return "medium"
}
return "low"
}