438 lines
12 KiB
TypeScript
438 lines
12 KiB
TypeScript
import {
|
||
filterDiscoverItemsWithAi,
|
||
type DiscoverFilterItem,
|
||
} from "@/lib/ai/filter-discover-relevance";
|
||
import type { ProviderApiKeys } from "@/lib/ai/keys";
|
||
import { searchWebThorough } from "@/lib/services/web-search";
|
||
import { search } from "@/lib/threads-browser/search";
|
||
import {
|
||
BROAD_TAG_BLOCKLIST,
|
||
buildTopicAnchor,
|
||
scoreTopicRelevance,
|
||
type TopicAnchor,
|
||
} from "@/lib/topic-anchor";
|
||
import {
|
||
normalizeThreadsPostUrl,
|
||
normalizeUsername,
|
||
threadsProfileUrl,
|
||
type AccountConfidence,
|
||
type SimilarAccount,
|
||
} from "@/lib/types/research";
|
||
|
||
const THREADS_PROFILE_RE =
|
||
/(?:https?:)?\/\/(?:www\.)?threads\.(?:com|net)\/@([a-zA-Z0-9._]+)/gi;
|
||
|
||
const RESERVED_USERNAMES = new Set([
|
||
"login",
|
||
"signup",
|
||
"search",
|
||
"explore",
|
||
"home",
|
||
"help",
|
||
"about",
|
||
"privacy",
|
||
"terms",
|
||
"settings",
|
||
"accounts",
|
||
"direct",
|
||
"reels",
|
||
"stories",
|
||
"legal",
|
||
"web",
|
||
"www",
|
||
"intent",
|
||
"share",
|
||
"threads",
|
||
"thread",
|
||
"instagram",
|
||
"meta",
|
||
]);
|
||
|
||
interface DiscoverAnchor extends TopicAnchor {
|
||
pillars: string[];
|
||
specificTags: string[];
|
||
}
|
||
|
||
interface AccountCandidate {
|
||
username: string;
|
||
score: number;
|
||
relevance: number;
|
||
aiScore?: number;
|
||
aiReason?: string;
|
||
reason: string;
|
||
source: SimilarAccount["source"];
|
||
postUrl?: string;
|
||
tags?: string[];
|
||
}
|
||
|
||
interface DiscoverContext {
|
||
label: string;
|
||
query: string;
|
||
brief?: string | null;
|
||
productContext?: string | null;
|
||
pillars?: string[];
|
||
suggestedTags?: string[];
|
||
exclusions?: string[];
|
||
}
|
||
|
||
function isValidUsername(username: string): boolean {
|
||
const clean = normalizeUsername(username);
|
||
if (!clean || clean.length < 2 || clean.length > 30) return false;
|
||
if (RESERVED_USERNAMES.has(clean.toLowerCase())) return false;
|
||
if (/^(creator_\d+|example_.*)$/i.test(clean)) return false;
|
||
return /^[a-zA-Z0-9._]+$/.test(clean);
|
||
}
|
||
|
||
function buildDiscoverAnchor(ctx: DiscoverContext): DiscoverAnchor {
|
||
const base = buildTopicAnchor(ctx);
|
||
const pillars = (ctx.pillars ?? []).map((p) => p.trim()).filter(Boolean);
|
||
const tagCandidates = (ctx.suggestedTags ?? [])
|
||
.map((t) => t.replace(/^@/, "").trim())
|
||
.filter((t) => t.length >= 3);
|
||
|
||
const specificTags = tagCandidates.filter((tag) => {
|
||
if (tag.length < 4 || BROAD_TAG_BLOCKLIST.has(tag)) return false;
|
||
if (tag.includes(base.corePhrase) || base.corePhrase.includes(tag)) return true;
|
||
if (base.requiredConcepts.length >= 2) {
|
||
const matched = base.requiredConcepts.filter((c) => tag.includes(c));
|
||
return matched.length >= 2;
|
||
}
|
||
return scoreTopicRelevance(tag, base) >= 6;
|
||
});
|
||
|
||
return { ...base, pillars, specificTags };
|
||
}
|
||
|
||
function assignConfidence(candidate: AccountCandidate): AccountConfidence {
|
||
if (candidate.score > 20 && (candidate.source === "threads" || candidate.source === "scan")) {
|
||
return "high";
|
||
}
|
||
if (candidate.score > 10 || (candidate.aiScore ?? 0) > 0.5) {
|
||
return "medium";
|
||
}
|
||
return "low";
|
||
}
|
||
|
||
function extractTagsFromText(text: string): string[] {
|
||
const found = new Set<string>();
|
||
for (const match of text.match(/#[\w\u4e00-\u9fff]{2,24}/g) ?? []) {
|
||
found.add(match.replace(/^#/, ""));
|
||
}
|
||
return Array.from(found).slice(0, 6);
|
||
}
|
||
|
||
function extractUsernamesFromText(text: string): string[] {
|
||
const found: string[] = [];
|
||
let match: RegExpExecArray | null;
|
||
const re = new RegExp(THREADS_PROFILE_RE.source, THREADS_PROFILE_RE.flags);
|
||
while ((match = re.exec(text)) !== null) {
|
||
found.push(match[1]);
|
||
}
|
||
return found;
|
||
}
|
||
|
||
function addCandidate(
|
||
map: Map<string, AccountCandidate>,
|
||
username: string,
|
||
params: {
|
||
reason: string;
|
||
source: SimilarAccount["source"];
|
||
weight?: number;
|
||
postUrl?: string;
|
||
tags?: string[];
|
||
anchor: TopicAnchor;
|
||
}
|
||
) {
|
||
const clean = normalizeUsername(username);
|
||
if (!isValidUsername(clean)) return;
|
||
|
||
const relevance = scoreTopicRelevance(params.reason, params.anchor);
|
||
|
||
const key = clean.toLowerCase();
|
||
const existing = map.get(key);
|
||
const weight = params.weight ?? 1;
|
||
const postUrl = params.postUrl ? normalizeThreadsPostUrl(params.postUrl) ?? undefined : undefined;
|
||
|
||
if (existing) {
|
||
existing.score += weight;
|
||
if (relevance > existing.relevance) {
|
||
existing.relevance = relevance;
|
||
existing.reason = params.reason;
|
||
}
|
||
if (postUrl) existing.postUrl = postUrl;
|
||
return;
|
||
}
|
||
|
||
map.set(key, {
|
||
username: clean,
|
||
score: weight,
|
||
relevance,
|
||
reason: params.reason,
|
||
source: params.source,
|
||
postUrl,
|
||
tags: params.tags,
|
||
});
|
||
}
|
||
|
||
function buildWebSearchQueries(anchor: DiscoverAnchor, brief?: string | null): string[] {
|
||
const quoted = `"${anchor.corePhrase}"`;
|
||
const queries = [
|
||
`site:threads.net ${quoted}`,
|
||
`threads ${quoted} 帳號`,
|
||
`threads ${quoted} 創作者`,
|
||
`site:threads.net ${quoted} 推薦`,
|
||
`site:threads.net ${quoted} 心得`,
|
||
`${quoted} 創作者 threads`,
|
||
`"${anchor.corePhrase}" site:threads.net`,
|
||
];
|
||
|
||
const briefHint = brief?.trim().slice(0, 24) ?? "";
|
||
if (briefHint.length >= 4 && scoreTopicRelevance(briefHint, anchor) >= 6) {
|
||
queries.push(`threads "${anchor.corePhrase}" ${briefHint}`);
|
||
}
|
||
|
||
for (const pillar of anchor.pillars.slice(0, 2)) {
|
||
if (pillar.length >= 4 && scoreTopicRelevance(pillar, anchor) >= 6) {
|
||
queries.push(`site:threads.net "${pillar}"`);
|
||
queries.push(`threads "${pillar}" 推薦`);
|
||
}
|
||
}
|
||
|
||
return [...new Set(queries)];
|
||
}
|
||
|
||
function buildThreadsSearchQueries(anchor: DiscoverAnchor): string[] {
|
||
const queries: string[] = [];
|
||
|
||
if (anchor.corePhrase.length >= 4) {
|
||
queries.push(anchor.corePhrase);
|
||
}
|
||
|
||
for (const tag of anchor.specificTags) {
|
||
if (tag.length >= 4 && tag.length <= 14 && scoreTopicRelevance(tag, anchor) >= 6) {
|
||
queries.push(tag);
|
||
}
|
||
}
|
||
|
||
return [...new Set(queries)].slice(0, 3);
|
||
}
|
||
|
||
async function discoverFromWebSearch(
|
||
anchor: DiscoverAnchor,
|
||
brief?: string | null
|
||
): Promise<AccountCandidate[]> {
|
||
const map = new Map<string, AccountCandidate>();
|
||
const queries = buildWebSearchQueries(anchor, brief);
|
||
|
||
const perQueryLimit = 15;
|
||
const results = await Promise.all(
|
||
queries.map((q) =>
|
||
searchWebThorough(q, perQueryLimit, {
|
||
patrolMode: true,
|
||
priority: "high",
|
||
threadsOnly: true,
|
||
}).catch(() => ({ results: [] }))
|
||
)
|
||
);
|
||
|
||
for (const batch of results) {
|
||
for (const item of batch.results) {
|
||
const blob = `${item.link} ${item.title} ${item.snippet}`;
|
||
const relevance = scoreTopicRelevance(blob, anchor);
|
||
|
||
const tags = extractTagsFromText(blob);
|
||
for (const username of extractUsernamesFromText(blob)) {
|
||
const reason =
|
||
item.snippet.trim() ||
|
||
item.title.trim() ||
|
||
`在「${anchor.corePhrase}」相關網路搜尋結果中找到`;
|
||
const postUrl = normalizeThreadsPostUrl(item.link) ?? undefined;
|
||
addCandidate(map, username, {
|
||
reason: reason.slice(0, 160),
|
||
source: "web",
|
||
weight:
|
||
(item.link.includes(`/@${username}`) ? 3 : 2) +
|
||
relevance / 4 +
|
||
(item.provider === "brave" ? 0.5 : 0),
|
||
postUrl,
|
||
tags,
|
||
anchor,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
return Array.from(map.values());
|
||
}
|
||
|
||
async function discoverFromThreadsSearch(
|
||
storageState: string,
|
||
anchor: DiscoverAnchor,
|
||
limit = 5
|
||
): Promise<AccountCandidate[]> {
|
||
const map = new Map<string, AccountCandidate>();
|
||
const queries = buildThreadsSearchQueries(anchor);
|
||
|
||
for (const q of queries) {
|
||
try {
|
||
const posts = await search(storageState, q, 15);
|
||
for (const post of posts) {
|
||
if (!post.authorName) continue;
|
||
const postText = post.text.trim();
|
||
const relevance = scoreTopicRelevance(postText, anchor);
|
||
|
||
addCandidate(map, post.authorName, {
|
||
reason:
|
||
postText.slice(0, 100) ||
|
||
`在 Threads 搜尋「${q}」的熱門貼文中出現`,
|
||
source: "threads",
|
||
weight: 2 + relevance / 2 + Math.min((post.likeCount ?? 0) / 100, 2),
|
||
postUrl: post.permalink,
|
||
anchor,
|
||
});
|
||
}
|
||
} catch {
|
||
continue;
|
||
}
|
||
}
|
||
|
||
return Array.from(map.values())
|
||
.sort((a, b) => b.score + b.relevance - (a.score + a.relevance))
|
||
.slice(0, limit);
|
||
}
|
||
|
||
function rankCandidates(candidates: AccountCandidate[]): AccountCandidate[] {
|
||
return candidates.sort(
|
||
(a, b) =>
|
||
b.score +
|
||
b.relevance * 2 +
|
||
(b.aiScore ?? 0) * 3 -
|
||
(a.score + a.relevance * 2 + (a.aiScore ?? 0) * 3)
|
||
);
|
||
}
|
||
|
||
async function applyAiRelevanceFilter(
|
||
candidates: AccountCandidate[],
|
||
ctx: DiscoverContext,
|
||
anchor: DiscoverAnchor,
|
||
ai?: {
|
||
aiProvider: string;
|
||
aiModel: string;
|
||
apiKeys?: ProviderApiKeys;
|
||
}
|
||
): Promise<AccountCandidate[]> {
|
||
if (!ai || candidates.length === 0) return candidates;
|
||
|
||
const items: DiscoverFilterItem[] = candidates.slice(0, 18).map((c) => ({
|
||
id: c.username.toLowerCase(),
|
||
text: c.reason,
|
||
username: c.username,
|
||
source: c.source ?? "web",
|
||
tags: c.tags,
|
||
}));
|
||
|
||
const verdicts = await filterDiscoverItemsWithAi({
|
||
label: ctx.label,
|
||
query: ctx.query,
|
||
brief: ctx.brief,
|
||
exclusions: ctx.exclusions,
|
||
pillars: ctx.pillars,
|
||
requiredConcepts: anchor.requiredConcepts,
|
||
items,
|
||
aiProvider: ai.aiProvider,
|
||
aiModel: ai.aiModel,
|
||
apiKeys: ai.apiKeys,
|
||
});
|
||
|
||
return candidates.map((c) => {
|
||
const verdict = verdicts.get(c.username.toLowerCase());
|
||
if (!verdict) return c;
|
||
return {
|
||
...c,
|
||
aiScore: verdict.score,
|
||
aiReason: verdict.reason,
|
||
relevance: verdict.relevant
|
||
? c.relevance + Math.round(verdict.score * 4)
|
||
: c.relevance,
|
||
};
|
||
});
|
||
}
|
||
|
||
function verifyAccountConsistency(
|
||
candidates: AccountCandidate[]
|
||
): AccountCandidate[] {
|
||
return candidates;
|
||
}
|
||
|
||
function toSimilarAccounts(candidates: AccountCandidate[], limit: number): SimilarAccount[] {
|
||
return candidates.slice(0, limit).map((c) => ({
|
||
username: c.username,
|
||
reason: c.aiReason ? `${c.reason}(AI:${c.aiReason})` : c.reason,
|
||
source: c.source,
|
||
profileUrl: threadsProfileUrl(c.username) ?? undefined,
|
||
postUrl: c.postUrl,
|
||
confidence: assignConfidence(c),
|
||
}));
|
||
}
|
||
|
||
export async function discoverSimilarAccounts(params: {
|
||
label: string;
|
||
query: string;
|
||
brief?: string | null;
|
||
productContext?: string | null;
|
||
pillars?: string[];
|
||
suggestedTags?: string[];
|
||
exclusions?: string[];
|
||
storageState?: string | null;
|
||
limit?: number;
|
||
aiProvider?: string;
|
||
aiModel?: string;
|
||
apiKeys?: ProviderApiKeys;
|
||
}): Promise<SimilarAccount[]> {
|
||
const limit = params.limit ?? 8;
|
||
const anchor = buildDiscoverAnchor(params);
|
||
|
||
const merged = new Map<string, AccountCandidate>();
|
||
|
||
if (params.storageState) {
|
||
const threadsCandidates = await discoverFromThreadsSearch(
|
||
params.storageState,
|
||
anchor,
|
||
limit
|
||
);
|
||
for (const c of threadsCandidates) {
|
||
merged.set(c.username.toLowerCase(), c);
|
||
}
|
||
}
|
||
|
||
const webCandidates = await discoverFromWebSearch(anchor, params.brief);
|
||
for (const c of webCandidates) {
|
||
const key = c.username.toLowerCase();
|
||
const existing = merged.get(key);
|
||
if (existing) {
|
||
existing.score += c.score;
|
||
existing.relevance = Math.max(existing.relevance, c.relevance);
|
||
if (c.postUrl) existing.postUrl = c.postUrl;
|
||
if (c.reason.length > existing.reason.length) existing.reason = c.reason;
|
||
} else {
|
||
merged.set(key, c);
|
||
}
|
||
}
|
||
|
||
let sorted = rankCandidates(Array.from(merged.values()));
|
||
|
||
if (params.aiProvider && params.aiModel) {
|
||
sorted = await applyAiRelevanceFilter(sorted, params, anchor, {
|
||
aiProvider: params.aiProvider,
|
||
aiModel: params.aiModel,
|
||
apiKeys: params.apiKeys,
|
||
});
|
||
sorted = rankCandidates(sorted);
|
||
}
|
||
|
||
sorted = verifyAccountConsistency(sorted);
|
||
sorted = rankCandidates(sorted);
|
||
|
||
return toSimilarAccounts(sorted, limit);
|
||
}
|