import { filterDiscoverItemsWithAi, type DiscoverFilterItem, } from "@/lib/ai/filter-discover-relevance"; import type { ProviderApiKeys } from "@/lib/ai/keys"; import { searchWebThorough } from "@/lib/services/web-search"; import { search } from "@/lib/threads-browser/search"; import { BROAD_TAG_BLOCKLIST, buildTopicAnchor, scoreTopicRelevance, type TopicAnchor, } from "@/lib/topic-anchor"; import { normalizeThreadsPostUrl, normalizeUsername, threadsProfileUrl, type SimilarAccount, } from "@/lib/types/research"; const THREADS_PROFILE_RE = /(?:https?:)?\/\/(?:www\.)?threads\.(?:com|net)\/@([a-zA-Z0-9._]+)/gi; const RESERVED_USERNAMES = new Set([ "login", "signup", "search", "explore", "home", "help", "about", "privacy", "terms", "settings", "accounts", "direct", "reels", "stories", "legal", "web", "www", "intent", "share", "threads", "thread", "instagram", "meta", ]); interface DiscoverAnchor extends TopicAnchor { pillars: string[]; specificTags: string[]; } interface AccountCandidate { username: string; score: number; relevance: number; aiScore?: number; aiReason?: string; reason: string; source: SimilarAccount["source"]; postUrl?: string; tags?: string[]; } interface DiscoverContext { label: string; query: string; brief?: string | null; productContext?: string | null; pillars?: string[]; suggestedTags?: string[]; exclusions?: string[]; } function isValidUsername(username: string): boolean { const clean = normalizeUsername(username); if (!clean || clean.length < 2 || clean.length > 30) return false; if (RESERVED_USERNAMES.has(clean.toLowerCase())) return false; if (/^(creator_\d+|example_.*)$/i.test(clean)) return false; return /^[a-zA-Z0-9._]+$/.test(clean); } function buildDiscoverAnchor(ctx: DiscoverContext): DiscoverAnchor { const base = buildTopicAnchor(ctx); const pillars = (ctx.pillars ?? []).map((p) => p.trim()).filter(Boolean); const tagCandidates = (ctx.suggestedTags ?? []) .map((t) => t.replace(/^@/, "").trim()) .filter((t) => t.length >= 3); const specificTags = tagCandidates.filter((tag) => { if (tag.length < 4 || BROAD_TAG_BLOCKLIST.has(tag)) return false; if (tag.includes(base.corePhrase) || base.corePhrase.includes(tag)) return true; if (base.requiredConcepts.length >= 2) { const matched = base.requiredConcepts.filter((c) => tag.includes(c)); return matched.length >= 2; } return scoreTopicRelevance(tag, base) >= 6; }); return { ...base, pillars, specificTags }; } function extractTagsFromText(text: string): string[] { const found = new Set(); for (const match of text.match(/#[\w\u4e00-\u9fff]{2,24}/g) ?? []) { found.add(match.replace(/^#/, "")); } return Array.from(found).slice(0, 6); } function extractUsernamesFromText(text: string): string[] { const found: string[] = []; let match: RegExpExecArray | null; const re = new RegExp(THREADS_PROFILE_RE.source, THREADS_PROFILE_RE.flags); while ((match = re.exec(text)) !== null) { found.push(match[1]); } return found; } function addCandidate( map: Map, username: string, params: { reason: string; source: SimilarAccount["source"]; weight?: number; postUrl?: string; tags?: string[]; anchor: TopicAnchor; } ) { const clean = normalizeUsername(username); if (!isValidUsername(clean)) return; const relevance = scoreTopicRelevance(params.reason, params.anchor); if (relevance < 3) return; const key = clean.toLowerCase(); const existing = map.get(key); const weight = params.weight ?? 1; const postUrl = params.postUrl ? normalizeThreadsPostUrl(params.postUrl) ?? undefined : undefined; if (existing) { existing.score += weight; if (relevance > existing.relevance) { existing.relevance = relevance; existing.reason = params.reason; } if (postUrl) existing.postUrl = postUrl; return; } map.set(key, { username: clean, score: weight, relevance, reason: params.reason, source: params.source, postUrl, tags: params.tags, }); } function buildWebSearchQueries(anchor: DiscoverAnchor, brief?: string | null): string[] { const quoted = `"${anchor.corePhrase}"`; const queries = [ `site:threads.com ${quoted}`, `site:threads.net ${quoted}`, `threads ${quoted} 帳號`, `threads ${quoted} 創作者`, ]; const briefHint = brief?.trim().slice(0, 24) ?? ""; if (briefHint.length >= 4 && scoreTopicRelevance(briefHint, anchor) >= 6) { queries.push(`threads "${anchor.corePhrase}" ${briefHint}`); } for (const pillar of anchor.pillars.slice(0, 2)) { if (pillar.length >= 4 && scoreTopicRelevance(pillar, anchor) >= 6) { queries.push(`site:threads.com "${pillar}"`); } } return [...new Set(queries)]; } function buildThreadsSearchQueries(anchor: DiscoverAnchor): string[] { const queries: string[] = []; if (anchor.corePhrase.length >= 4) { queries.push(anchor.corePhrase); } for (const tag of anchor.specificTags) { if (tag.length >= 4 && tag.length <= 14 && scoreTopicRelevance(tag, anchor) >= 6) { queries.push(tag); } } return [...new Set(queries)].slice(0, 3); } async function discoverFromWebSearch( anchor: DiscoverAnchor, brief?: string | null ): Promise { const map = new Map(); const queries = buildWebSearchQueries(anchor, brief); const perQueryLimit = 8; const results = await Promise.all( queries.map((q) => searchWebThorough(q, perQueryLimit, { patrolMode: true, priority: "high", threadsOnly: true, }).catch(() => ({ results: [] })) ) ); for (const batch of results) { for (const item of batch.results) { const blob = `${item.link} ${item.title} ${item.snippet}`; const relevance = scoreTopicRelevance(blob, anchor); if (relevance < 3) continue; const tags = extractTagsFromText(blob); for (const username of extractUsernamesFromText(blob)) { const reason = item.snippet.trim() || item.title.trim() || `在「${anchor.corePhrase}」相關網路搜尋結果中找到`; const postUrl = normalizeThreadsPostUrl(item.link) ?? undefined; addCandidate(map, username, { reason: reason.slice(0, 160), source: "web", weight: (item.link.includes(`/@${username}`) ? 3 : 2) + relevance / 4 + (item.provider === "brave" ? 0.5 : 0), postUrl, tags, anchor, }); } } } return Array.from(map.values()); } async function discoverFromThreadsSearch( storageState: string, anchor: DiscoverAnchor, limit = 5 ): Promise { const map = new Map(); const queries = buildThreadsSearchQueries(anchor); for (const q of queries) { try { const posts = await search(storageState, q, 15); for (const post of posts) { if (!post.authorName) continue; const postText = post.text.trim(); const relevance = scoreTopicRelevance(postText, anchor); if (relevance < 3) continue; addCandidate(map, post.authorName, { reason: postText.slice(0, 100) || `在 Threads 搜尋「${q}」的熱門貼文中出現`, source: "threads", weight: 2 + relevance / 2 + Math.min((post.likeCount ?? 0) / 100, 2), postUrl: post.permalink, anchor, }); } } catch { continue; } } return Array.from(map.values()) .sort((a, b) => b.score + b.relevance - (a.score + a.relevance)) .slice(0, limit); } function rankCandidates(candidates: AccountCandidate[]): AccountCandidate[] { return candidates .filter((c) => c.relevance >= 3) .sort( (a, b) => b.score + b.relevance * 2 + (b.aiScore ?? 0) * 3 - (a.score + a.relevance * 2 + (a.aiScore ?? 0) * 3) ); } async function applyAiRelevanceFilter( candidates: AccountCandidate[], ctx: DiscoverContext, anchor: DiscoverAnchor, ai?: { aiProvider: string; aiModel: string; apiKeys?: ProviderApiKeys; } ): Promise { if (!ai || candidates.length === 0) return candidates; const items: DiscoverFilterItem[] = candidates.slice(0, 18).map((c) => ({ id: c.username.toLowerCase(), text: c.reason, username: c.username, source: c.source ?? "web", tags: c.tags, })); const verdicts = await filterDiscoverItemsWithAi({ label: ctx.label, query: ctx.query, brief: ctx.brief, exclusions: ctx.exclusions, pillars: ctx.pillars, requiredConcepts: anchor.requiredConcepts, items, aiProvider: ai.aiProvider, aiModel: ai.aiModel, apiKeys: ai.apiKeys, }); return candidates .map((c) => { const verdict = verdicts.get(c.username.toLowerCase()); if (!verdict) return c; return { ...c, aiScore: verdict.score, aiReason: verdict.reason, relevance: verdict.relevant ? c.relevance + Math.round(verdict.score * 4) : -100, }; }) .filter((c) => c.relevance >= 3); } function toSimilarAccounts(candidates: AccountCandidate[], limit: number): SimilarAccount[] { return candidates.slice(0, limit).map((c) => ({ username: c.username, reason: c.aiReason ? `${c.reason}(AI:${c.aiReason})` : c.reason, source: c.source, profileUrl: threadsProfileUrl(c.username) ?? undefined, postUrl: c.postUrl, })); } /** 優先瀏覽器爬蟲,不足時 Brave 網搜補充;不讓 AI 捏造 username */ export async function discoverSimilarAccounts(params: { label: string; query: string; brief?: string | null; productContext?: string | null; pillars?: string[]; suggestedTags?: string[]; exclusions?: string[]; storageState?: string | null; limit?: number; aiProvider?: string; aiModel?: string; apiKeys?: ProviderApiKeys; }): Promise { const limit = params.limit ?? 8; const anchor = buildDiscoverAnchor(params); const merged = new Map(); // 1. 瀏覽器爬蟲(Threads 站內搜尋) if (params.storageState) { const threadsCandidates = await discoverFromThreadsSearch( params.storageState, anchor, limit ); for (const c of threadsCandidates) { merged.set(c.username.toLowerCase(), c); } } // 2. Brave 網搜補充(結果不足時) const rankedSoFar = rankCandidates(Array.from(merged.values())); if (rankedSoFar.length < 3) { const webCandidates = await discoverFromWebSearch(anchor, params.brief); for (const c of webCandidates) { const key = c.username.toLowerCase(); const existing = merged.get(key); if (existing) { existing.score += c.score; existing.relevance = Math.max(existing.relevance, c.relevance); if (c.postUrl) existing.postUrl = c.postUrl; if (c.reason.length > existing.reason.length) existing.reason = c.reason; } else { merged.set(key, c); } } } let sorted = rankCandidates(Array.from(merged.values())); if (params.aiProvider && params.aiModel) { sorted = await applyAiRelevanceFilter(sorted, params, anchor, { aiProvider: params.aiProvider, aiModel: params.aiModel, apiKeys: params.apiKeys, }); sorted = rankCandidates(sorted); } return toSimilarAccounts(sorted, limit); }