haixunMaster/lib/services/discover-accounts.ts

438 lines
12 KiB
TypeScript
Raw Normal View History

2026-06-21 12:50:31 +00:00
import {
filterDiscoverItemsWithAi,
type DiscoverFilterItem,
} from "@/lib/ai/filter-discover-relevance";
import type { ProviderApiKeys } from "@/lib/ai/keys";
import { searchWebThorough } from "@/lib/services/web-search";
import { search } from "@/lib/threads-browser/search";
import {
BROAD_TAG_BLOCKLIST,
buildTopicAnchor,
scoreTopicRelevance,
type TopicAnchor,
} from "@/lib/topic-anchor";
import {
normalizeThreadsPostUrl,
normalizeUsername,
threadsProfileUrl,
2026-06-21 16:28:26 +00:00
type AccountConfidence,
2026-06-21 12:50:31 +00:00
type SimilarAccount,
} from "@/lib/types/research";
const THREADS_PROFILE_RE =
/(?:https?:)?\/\/(?:www\.)?threads\.(?:com|net)\/@([a-zA-Z0-9._]+)/gi;
const RESERVED_USERNAMES = new Set([
"login",
"signup",
"search",
"explore",
"home",
"help",
"about",
"privacy",
"terms",
"settings",
"accounts",
"direct",
"reels",
"stories",
"legal",
"web",
"www",
"intent",
"share",
"threads",
"thread",
"instagram",
"meta",
]);
interface DiscoverAnchor extends TopicAnchor {
pillars: string[];
specificTags: string[];
}
interface AccountCandidate {
username: string;
score: number;
relevance: number;
aiScore?: number;
aiReason?: string;
reason: string;
source: SimilarAccount["source"];
postUrl?: string;
tags?: string[];
}
interface DiscoverContext {
label: string;
query: string;
brief?: string | null;
productContext?: string | null;
pillars?: string[];
suggestedTags?: string[];
exclusions?: string[];
}
function isValidUsername(username: string): boolean {
const clean = normalizeUsername(username);
if (!clean || clean.length < 2 || clean.length > 30) return false;
if (RESERVED_USERNAMES.has(clean.toLowerCase())) return false;
if (/^(creator_\d+|example_.*)$/i.test(clean)) return false;
return /^[a-zA-Z0-9._]+$/.test(clean);
}
function buildDiscoverAnchor(ctx: DiscoverContext): DiscoverAnchor {
const base = buildTopicAnchor(ctx);
const pillars = (ctx.pillars ?? []).map((p) => p.trim()).filter(Boolean);
const tagCandidates = (ctx.suggestedTags ?? [])
.map((t) => t.replace(/^@/, "").trim())
.filter((t) => t.length >= 3);
const specificTags = tagCandidates.filter((tag) => {
if (tag.length < 4 || BROAD_TAG_BLOCKLIST.has(tag)) return false;
if (tag.includes(base.corePhrase) || base.corePhrase.includes(tag)) return true;
if (base.requiredConcepts.length >= 2) {
const matched = base.requiredConcepts.filter((c) => tag.includes(c));
return matched.length >= 2;
}
return scoreTopicRelevance(tag, base) >= 6;
});
return { ...base, pillars, specificTags };
}
2026-06-21 16:28:26 +00:00
function assignConfidence(candidate: AccountCandidate): AccountConfidence {
if (candidate.score > 20 && (candidate.source === "threads" || candidate.source === "scan")) {
return "high";
}
if (candidate.score > 10 || (candidate.aiScore ?? 0) > 0.5) {
return "medium";
}
return "low";
}
2026-06-21 12:50:31 +00:00
function extractTagsFromText(text: string): string[] {
const found = new Set<string>();
for (const match of text.match(/#[\w\u4e00-\u9fff]{2,24}/g) ?? []) {
found.add(match.replace(/^#/, ""));
}
return Array.from(found).slice(0, 6);
}
function extractUsernamesFromText(text: string): string[] {
const found: string[] = [];
let match: RegExpExecArray | null;
const re = new RegExp(THREADS_PROFILE_RE.source, THREADS_PROFILE_RE.flags);
while ((match = re.exec(text)) !== null) {
found.push(match[1]);
}
return found;
}
function addCandidate(
map: Map<string, AccountCandidate>,
username: string,
params: {
reason: string;
source: SimilarAccount["source"];
weight?: number;
postUrl?: string;
tags?: string[];
anchor: TopicAnchor;
}
) {
const clean = normalizeUsername(username);
if (!isValidUsername(clean)) return;
const relevance = scoreTopicRelevance(params.reason, params.anchor);
const key = clean.toLowerCase();
const existing = map.get(key);
const weight = params.weight ?? 1;
const postUrl = params.postUrl ? normalizeThreadsPostUrl(params.postUrl) ?? undefined : undefined;
if (existing) {
existing.score += weight;
if (relevance > existing.relevance) {
existing.relevance = relevance;
existing.reason = params.reason;
}
if (postUrl) existing.postUrl = postUrl;
return;
}
map.set(key, {
username: clean,
score: weight,
relevance,
reason: params.reason,
source: params.source,
postUrl,
tags: params.tags,
});
}
function buildWebSearchQueries(anchor: DiscoverAnchor, brief?: string | null): string[] {
const quoted = `"${anchor.corePhrase}"`;
const queries = [
`site:threads.net ${quoted}`,
`threads ${quoted} 帳號`,
`threads ${quoted} 創作者`,
2026-06-21 16:28:26 +00:00
`site:threads.net ${quoted} 推薦`,
`site:threads.net ${quoted} 心得`,
`${quoted} 創作者 threads`,
`"${anchor.corePhrase}" site:threads.net`,
2026-06-21 12:50:31 +00:00
];
const briefHint = brief?.trim().slice(0, 24) ?? "";
if (briefHint.length >= 4 && scoreTopicRelevance(briefHint, anchor) >= 6) {
queries.push(`threads "${anchor.corePhrase}" ${briefHint}`);
}
for (const pillar of anchor.pillars.slice(0, 2)) {
if (pillar.length >= 4 && scoreTopicRelevance(pillar, anchor) >= 6) {
2026-06-21 16:28:26 +00:00
queries.push(`site:threads.net "${pillar}"`);
queries.push(`threads "${pillar}" 推薦`);
2026-06-21 12:50:31 +00:00
}
}
return [...new Set(queries)];
}
function buildThreadsSearchQueries(anchor: DiscoverAnchor): string[] {
const queries: string[] = [];
if (anchor.corePhrase.length >= 4) {
queries.push(anchor.corePhrase);
}
for (const tag of anchor.specificTags) {
if (tag.length >= 4 && tag.length <= 14 && scoreTopicRelevance(tag, anchor) >= 6) {
queries.push(tag);
}
}
return [...new Set(queries)].slice(0, 3);
}
async function discoverFromWebSearch(
anchor: DiscoverAnchor,
brief?: string | null
): Promise<AccountCandidate[]> {
const map = new Map<string, AccountCandidate>();
const queries = buildWebSearchQueries(anchor, brief);
2026-06-21 16:28:26 +00:00
const perQueryLimit = 15;
2026-06-21 12:50:31 +00:00
const results = await Promise.all(
queries.map((q) =>
searchWebThorough(q, perQueryLimit, {
patrolMode: true,
priority: "high",
threadsOnly: true,
}).catch(() => ({ results: [] }))
)
);
for (const batch of results) {
for (const item of batch.results) {
const blob = `${item.link} ${item.title} ${item.snippet}`;
const relevance = scoreTopicRelevance(blob, anchor);
const tags = extractTagsFromText(blob);
for (const username of extractUsernamesFromText(blob)) {
const reason =
item.snippet.trim() ||
item.title.trim() ||
`在「${anchor.corePhrase}」相關網路搜尋結果中找到`;
const postUrl = normalizeThreadsPostUrl(item.link) ?? undefined;
addCandidate(map, username, {
reason: reason.slice(0, 160),
source: "web",
weight:
(item.link.includes(`/@${username}`) ? 3 : 2) +
relevance / 4 +
(item.provider === "brave" ? 0.5 : 0),
postUrl,
tags,
anchor,
});
}
}
}
return Array.from(map.values());
}
async function discoverFromThreadsSearch(
storageState: string,
anchor: DiscoverAnchor,
limit = 5
): Promise<AccountCandidate[]> {
const map = new Map<string, AccountCandidate>();
const queries = buildThreadsSearchQueries(anchor);
for (const q of queries) {
try {
const posts = await search(storageState, q, 15);
for (const post of posts) {
if (!post.authorName) continue;
const postText = post.text.trim();
const relevance = scoreTopicRelevance(postText, anchor);
addCandidate(map, post.authorName, {
reason:
postText.slice(0, 100) ||
`在 Threads 搜尋「${q}」的熱門貼文中出現`,
source: "threads",
weight: 2 + relevance / 2 + Math.min((post.likeCount ?? 0) / 100, 2),
postUrl: post.permalink,
anchor,
});
}
} catch {
continue;
}
}
return Array.from(map.values())
.sort((a, b) => b.score + b.relevance - (a.score + a.relevance))
.slice(0, limit);
}
function rankCandidates(candidates: AccountCandidate[]): AccountCandidate[] {
2026-06-21 16:28:26 +00:00
return candidates.sort(
(a, b) =>
b.score +
b.relevance * 2 +
(b.aiScore ?? 0) * 3 -
(a.score + a.relevance * 2 + (a.aiScore ?? 0) * 3)
);
2026-06-21 12:50:31 +00:00
}
async function applyAiRelevanceFilter(
candidates: AccountCandidate[],
ctx: DiscoverContext,
anchor: DiscoverAnchor,
ai?: {
aiProvider: string;
aiModel: string;
apiKeys?: ProviderApiKeys;
}
): Promise<AccountCandidate[]> {
if (!ai || candidates.length === 0) return candidates;
const items: DiscoverFilterItem[] = candidates.slice(0, 18).map((c) => ({
id: c.username.toLowerCase(),
text: c.reason,
username: c.username,
source: c.source ?? "web",
tags: c.tags,
}));
const verdicts = await filterDiscoverItemsWithAi({
label: ctx.label,
query: ctx.query,
brief: ctx.brief,
exclusions: ctx.exclusions,
pillars: ctx.pillars,
requiredConcepts: anchor.requiredConcepts,
items,
aiProvider: ai.aiProvider,
aiModel: ai.aiModel,
apiKeys: ai.apiKeys,
});
2026-06-21 16:28:26 +00:00
return candidates.map((c) => {
const verdict = verdicts.get(c.username.toLowerCase());
if (!verdict) return c;
return {
...c,
aiScore: verdict.score,
aiReason: verdict.reason,
relevance: verdict.relevant
? c.relevance + Math.round(verdict.score * 4)
: c.relevance,
};
});
}
function verifyAccountConsistency(
candidates: AccountCandidate[]
): AccountCandidate[] {
return candidates;
2026-06-21 12:50:31 +00:00
}
function toSimilarAccounts(candidates: AccountCandidate[], limit: number): SimilarAccount[] {
return candidates.slice(0, limit).map((c) => ({
username: c.username,
reason: c.aiReason ? `${c.reason}AI${c.aiReason}` : c.reason,
source: c.source,
profileUrl: threadsProfileUrl(c.username) ?? undefined,
postUrl: c.postUrl,
2026-06-21 16:28:26 +00:00
confidence: assignConfidence(c),
2026-06-21 12:50:31 +00:00
}));
}
export async function discoverSimilarAccounts(params: {
label: string;
query: string;
brief?: string | null;
productContext?: string | null;
pillars?: string[];
suggestedTags?: string[];
exclusions?: string[];
storageState?: string | null;
limit?: number;
aiProvider?: string;
aiModel?: string;
apiKeys?: ProviderApiKeys;
}): Promise<SimilarAccount[]> {
const limit = params.limit ?? 8;
const anchor = buildDiscoverAnchor(params);
const merged = new Map<string, AccountCandidate>();
if (params.storageState) {
const threadsCandidates = await discoverFromThreadsSearch(
params.storageState,
anchor,
limit
);
for (const c of threadsCandidates) {
merged.set(c.username.toLowerCase(), c);
}
}
2026-06-21 16:28:26 +00:00
const webCandidates = await discoverFromWebSearch(anchor, params.brief);
for (const c of webCandidates) {
const key = c.username.toLowerCase();
const existing = merged.get(key);
if (existing) {
existing.score += c.score;
existing.relevance = Math.max(existing.relevance, c.relevance);
if (c.postUrl) existing.postUrl = c.postUrl;
if (c.reason.length > existing.reason.length) existing.reason = c.reason;
} else {
merged.set(key, c);
2026-06-21 12:50:31 +00:00
}
}
let sorted = rankCandidates(Array.from(merged.values()));
if (params.aiProvider && params.aiModel) {
sorted = await applyAiRelevanceFilter(sorted, params, anchor, {
aiProvider: params.aiProvider,
aiModel: params.aiModel,
apiKeys: params.apiKeys,
});
sorted = rankCandidates(sorted);
}
2026-06-21 16:28:26 +00:00
sorted = verifyAccountConsistency(sorted);
sorted = rankCandidates(sorted);
2026-06-21 12:50:31 +00:00
return toSimilarAccounts(sorted, limit);
2026-06-21 16:28:26 +00:00
}