package viral import ( "context" "regexp" "sort" "strings" "haixun-backend/internal/library/websearch" ) const ( maxAccountDiscoverQueries = 2 MaxSimilarAccounts = 5 ) var threadsProfileRE = regexp.MustCompile(`(?i)threads\.(?:com|net)/@([a-zA-Z0-9._]+)`) var reservedUsernames = map[string]struct{}{ "login": {}, "signup": {}, "search": {}, "explore": {}, "home": {}, "help": {}, "about": {}, "privacy": {}, "terms": {}, "settings": {}, "threads": {}, "thread": {}, "instagram": {}, "meta": {}, "www": {}, } type SimilarAccount struct { Username string `json:"username"` Reason string `json:"reason"` Source string `json:"source"` Confidence string `json:"confidence"` ProfileURL string `json:"profileUrl"` } type DiscoverAccountsInput struct { SeedQuery string Brief string Pillars []string } type accountCandidate struct { username string score int reason string source string } func DiscoverSimilarAccounts(ctx context.Context, client websearch.Client, input DiscoverAccountsInput) ([]SimilarAccount, error) { if client == nil || !client.Enabled() { return nil, nil } seed := strings.TrimSpace(input.SeedQuery) if seed == "" { return nil, nil } queries := buildAccountDiscoverQueries(seed, input.Brief, input.Pillars) if len(queries) == 0 { return nil, nil } seen := map[string]accountCandidate{} for _, query := range queries { res, err := client.Search(ctx, websearch.SearchOptions{ Query: query, Limit: 12, Mode: websearch.ModeThreadsDiscover, }) if err != nil || res.Status != "success" { continue } for _, item := range res.Results { blob := strings.TrimSpace(item.URL + " " + item.Title + " " + item.Snippet) for _, username := range extractUsernames(blob) { weight := 2 if strings.Contains(strings.ToLower(item.URL), "/@"+strings.ToLower(username)) { weight = 4 } reason := strings.TrimSpace(item.Snippet) if reason == "" { reason = strings.TrimSpace(item.Title) } if reason == "" { reason = "在「" + seed + "」相關搜尋結果中找到" } if len([]rune(reason)) > 120 { reason = string([]rune(reason)[:120]) } key := strings.ToLower(username) prev, ok := seen[key] if !ok || weight > prev.score { seen[key] = accountCandidate{ username: username, score: weight, reason: reason, source: "web", } } else if ok { prev.score += 1 seen[key] = prev } } } } out := make([]accountCandidate, 0, len(seen)) for _, item := range seen { out = append(out, item) } sort.Slice(out, func(i, j int) bool { return out[i].score > out[j].score }) if len(out) > MaxSimilarAccounts { out = out[:MaxSimilarAccounts] } accounts := make([]SimilarAccount, 0, len(out)) for _, item := range out { accounts = append(accounts, SimilarAccount{ Username: item.username, Reason: item.reason, Source: item.source, Confidence: accountConfidence(item.score), ProfileURL: "https://www.threads.net/@" + item.username, }) } return accounts, nil } func buildAccountDiscoverQueries(seed, brief string, pillars []string) []string { quoted := `"` + seed + `"` queries := []string{ `site:threads.net ` + quoted, `threads ` + quoted + ` 創作者`, } if hint := strings.TrimSpace(brief); len([]rune(hint)) >= 4 && len([]rune(hint)) <= 24 { queries = append(queries, `site:threads.net `+quoted+` `+hint) } for _, pillar := range pillars { pillar = strings.TrimSpace(pillar) if len([]rune(pillar)) >= 4 && len(queries) < maxAccountDiscoverQueries+1 { queries = append(queries, `site:threads.net "`+pillar+`"`) } } unique := []string{} seen := map[string]struct{}{} for _, q := range queries { q = strings.TrimSpace(q) if q == "" { continue } if _, ok := seen[q]; ok { continue } seen[q] = struct{}{} unique = append(unique, q) if len(unique) >= maxAccountDiscoverQueries { break } } return unique } func extractUsernames(blob string) []string { matches := threadsProfileRE.FindAllStringSubmatch(blob, -1) out := []string{} seen := map[string]struct{}{} for _, match := range matches { if len(match) < 2 { continue } user := strings.TrimSpace(match[1]) if !isValidUsername(user) { continue } key := strings.ToLower(user) if _, ok := seen[key]; ok { continue } seen[key] = struct{}{} out = append(out, user) } return out } func isValidUsername(username string) bool { if username == "" || len(username) < 2 || len(username) > 30 { return false } if _, ok := reservedUsernames[strings.ToLower(username)]; ok { return false } for _, r := range username { if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '.' || r == '_' { continue } return false } return true } func accountConfidence(score int) string { if score >= 5 { return "high" } if score >= 3 { return "medium" } return "low" }