206 lines
4.9 KiB
Go
206 lines
4.9 KiB
Go
package viral
|
|
|
|
import (
|
|
"context"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
|
|
"haixun-backend/internal/library/websearch"
|
|
)
|
|
|
|
const (
|
|
maxAccountDiscoverQueries = 2
|
|
MaxSimilarAccounts = 5
|
|
)
|
|
|
|
var threadsProfileRE = regexp.MustCompile(`(?i)threads\.(?:com|net)/@([a-zA-Z0-9._]+)`)
|
|
|
|
var reservedUsernames = map[string]struct{}{
|
|
"login": {}, "signup": {}, "search": {}, "explore": {}, "home": {},
|
|
"help": {}, "about": {}, "privacy": {}, "terms": {}, "settings": {},
|
|
"threads": {}, "thread": {}, "instagram": {}, "meta": {}, "www": {},
|
|
}
|
|
|
|
type SimilarAccount struct {
|
|
Username string `json:"username"`
|
|
Reason string `json:"reason"`
|
|
Source string `json:"source"`
|
|
Confidence string `json:"confidence"`
|
|
ProfileURL string `json:"profileUrl"`
|
|
}
|
|
|
|
type DiscoverAccountsInput struct {
|
|
SeedQuery string
|
|
Brief string
|
|
Pillars []string
|
|
}
|
|
|
|
type accountCandidate struct {
|
|
username string
|
|
score int
|
|
reason string
|
|
source string
|
|
}
|
|
|
|
func DiscoverSimilarAccounts(ctx context.Context, client websearch.Client, input DiscoverAccountsInput) ([]SimilarAccount, error) {
|
|
if client == nil || !client.Enabled() {
|
|
return nil, nil
|
|
}
|
|
seed := strings.TrimSpace(input.SeedQuery)
|
|
if seed == "" {
|
|
return nil, nil
|
|
}
|
|
queries := buildAccountDiscoverQueries(seed, input.Brief, input.Pillars)
|
|
if len(queries) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
seen := map[string]accountCandidate{}
|
|
for _, query := range queries {
|
|
res, err := client.Search(ctx, websearch.SearchOptions{
|
|
Query: query,
|
|
Limit: 12,
|
|
Mode: websearch.ModeThreadsDiscover,
|
|
})
|
|
if err != nil || res.Status != "success" {
|
|
continue
|
|
}
|
|
for _, item := range res.Results {
|
|
blob := strings.TrimSpace(item.URL + " " + item.Title + " " + item.Snippet)
|
|
for _, username := range extractUsernames(blob) {
|
|
weight := 2
|
|
if strings.Contains(strings.ToLower(item.URL), "/@"+strings.ToLower(username)) {
|
|
weight = 4
|
|
}
|
|
reason := strings.TrimSpace(item.Snippet)
|
|
if reason == "" {
|
|
reason = strings.TrimSpace(item.Title)
|
|
}
|
|
if reason == "" {
|
|
reason = "在「" + seed + "」相關搜尋結果中找到"
|
|
}
|
|
if len([]rune(reason)) > 120 {
|
|
reason = string([]rune(reason)[:120])
|
|
}
|
|
key := strings.ToLower(username)
|
|
prev, ok := seen[key]
|
|
if !ok || weight > prev.score {
|
|
seen[key] = accountCandidate{
|
|
username: username,
|
|
score: weight,
|
|
reason: reason,
|
|
source: "web",
|
|
}
|
|
} else if ok {
|
|
prev.score += 1
|
|
seen[key] = prev
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
out := make([]accountCandidate, 0, len(seen))
|
|
for _, item := range seen {
|
|
out = append(out, item)
|
|
}
|
|
sort.Slice(out, func(i, j int) bool { return out[i].score > out[j].score })
|
|
if len(out) > MaxSimilarAccounts {
|
|
out = out[:MaxSimilarAccounts]
|
|
}
|
|
|
|
accounts := make([]SimilarAccount, 0, len(out))
|
|
for _, item := range out {
|
|
accounts = append(accounts, SimilarAccount{
|
|
Username: item.username,
|
|
Reason: item.reason,
|
|
Source: item.source,
|
|
Confidence: accountConfidence(item.score),
|
|
ProfileURL: "https://www.threads.net/@" + item.username,
|
|
})
|
|
}
|
|
return accounts, nil
|
|
}
|
|
|
|
func buildAccountDiscoverQueries(seed, brief string, pillars []string) []string {
|
|
quoted := `"` + seed + `"`
|
|
queries := []string{
|
|
`site:threads.net ` + quoted,
|
|
`threads ` + quoted + ` 創作者`,
|
|
}
|
|
if hint := strings.TrimSpace(brief); len([]rune(hint)) >= 4 && len([]rune(hint)) <= 24 {
|
|
queries = append(queries, `site:threads.net `+quoted+` `+hint)
|
|
}
|
|
for _, pillar := range pillars {
|
|
pillar = strings.TrimSpace(pillar)
|
|
if len([]rune(pillar)) >= 4 && len(queries) < maxAccountDiscoverQueries+1 {
|
|
queries = append(queries, `site:threads.net "`+pillar+`"`)
|
|
}
|
|
}
|
|
unique := []string{}
|
|
seen := map[string]struct{}{}
|
|
for _, q := range queries {
|
|
q = strings.TrimSpace(q)
|
|
if q == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[q]; ok {
|
|
continue
|
|
}
|
|
seen[q] = struct{}{}
|
|
unique = append(unique, q)
|
|
if len(unique) >= maxAccountDiscoverQueries {
|
|
break
|
|
}
|
|
}
|
|
return unique
|
|
}
|
|
|
|
func extractUsernames(blob string) []string {
|
|
matches := threadsProfileRE.FindAllStringSubmatch(blob, -1)
|
|
out := []string{}
|
|
seen := map[string]struct{}{}
|
|
for _, match := range matches {
|
|
if len(match) < 2 {
|
|
continue
|
|
}
|
|
user := strings.TrimSpace(match[1])
|
|
if !isValidUsername(user) {
|
|
continue
|
|
}
|
|
key := strings.ToLower(user)
|
|
if _, ok := seen[key]; ok {
|
|
continue
|
|
}
|
|
seen[key] = struct{}{}
|
|
out = append(out, user)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func isValidUsername(username string) bool {
|
|
if username == "" || len(username) < 2 || len(username) > 30 {
|
|
return false
|
|
}
|
|
if _, ok := reservedUsernames[strings.ToLower(username)]; ok {
|
|
return false
|
|
}
|
|
for _, r := range username {
|
|
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '.' || r == '_' {
|
|
continue
|
|
}
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func accountConfidence(score int) string {
|
|
if score >= 5 {
|
|
return "high"
|
|
}
|
|
if score >= 3 {
|
|
return "medium"
|
|
}
|
|
return "low"
|
|
}
|