import "server-only"; import { computePlacementScore, computeScore, type RankedPost } from "@/lib/ranking"; import { DEFAULT_WEB_SEARCH_MAX_QUERIES, formatGoogleAfterDate, PLACEMENT_WEB_SEARCH_MAX_AGE_DAYS, PLACEMENT_WEB_SEARCH_MAX_QUERIES, PLACEMENT_WEB_SEARCH_TARGET_POSTS, } from "@/lib/scan-recency"; import { isBraveSearchConfigured, type BraveWebSearchOptions, } from "@/lib/services/web-search"; import type { KeywordPriority } from "@/lib/search"; import { type ContentBandInput, isInContentBand, } from "@/lib/research-content-band"; import { hasPlacementIntent, looksLikeCasualChat } from "@/lib/topic-anchor"; import type { SearchIntent, SearchTagType } from "@/lib/types/research"; import { isAccountTag, normalizeThreadsPostUrl, normalizeUsername, type SimilarAccount, } from "@/lib/types/research"; import { searchWebThorough } from "@/lib/services/web-search"; import { runWithConcurrency } from "@/lib/utils/concurrency"; export type ScanPostSource = "account" | "keyword" | "web"; export type WebDiscoveredPost = RankedPost & { searchTag?: string; scanSource?: ScanPostSource; }; const SOURCE_PRIORITY: Record = { account: 3, keyword: 2, web: 1, }; const SOURCE_SCORE_BOOST: Record = { account: 1.35, keyword: 1, web: 0.85, }; function postKey(post: { permalink?: string; externalId?: string; authorName?: string; text: string; }) { return ( post.permalink ?? post.externalId ?? `${post.authorName ?? ""}:${post.text.trim().slice(0, 120)}` ); } function withSource( post: RankedPost & { searchTag?: string }, scanSource: ScanPostSource ): WebDiscoveredPost { return { ...post, scanSource, score: post.score * SOURCE_SCORE_BOOST[scanSource], }; } interface TagSearchMeta { searchIntent?: SearchIntent; searchType?: SearchTagType; } /** 每個關鍵字只組 1 條高訊號查詢,控制 Brave 額度 */ function buildPlacementKeywordQueries(tag: string, meta?: TagSearchMeta): string[] { const after = formatGoogleAfterDate(PLACEMENT_WEB_SEARCH_MAX_AGE_DAYS); const isNeedTag = meta?.searchIntent === "需求" || meta?.searchIntent === "求助" || meta?.searchIntent === "痛點"; const intent = isNeedTag ? "求推薦" : "請問"; return [`site:threads.com "${tag}" ${intent} after:${after}`]; } function resolveBraveQueryCap(placementMode: boolean): number { const raw = process.env.SCAN_BRAVE_MAX_QUERIES?.trim(); const parsed = raw ? Number.parseInt(raw, 10) : NaN; if (Number.isFinite(parsed) && parsed > 0) { return Math.min(parsed, placementMode ? 20 : 30); } return placementMode ? PLACEMENT_WEB_SEARCH_MAX_QUERIES : DEFAULT_WEB_SEARCH_MAX_QUERIES; } function buildKeywordQueries( tag: string, placementMode: boolean, meta?: TagSearchMeta ): string[] { if (placementMode) return buildPlacementKeywordQueries(tag, meta); return [`site:threads.com "${tag}"`, `site:threads.net "${tag}"`]; } function passesPlacementWebFilter( text: string, placementMode: boolean, contentBand?: ContentBandInput ): boolean { if (!placementMode) return true; if (looksLikeCasualChat(text)) return false; if (!hasPlacementIntent(text)) return false; if (contentBand && !isInContentBand(text, contentBand)) return false; return true; } function scoreDiscoveredPost( raw: { text: string; permalink: string; authorName: string; externalId: string; postedAt?: Date; likeCount?: number; replyCount?: number; }, placementMode: boolean ): number { return placementMode ? computePlacementScore(raw) : computeScore(raw); } function parsePostFromUrl( link: string, title: string, snippet: string, searchTag: string, scanSource: ScanPostSource = "web", placementMode = false ): WebDiscoveredPost | null { const permalink = normalizeThreadsPostUrl(link); if (!permalink) return null; const match = permalink.match(/threads\.com\/@([^/]+)\/post\/([^/?#]+)/i); if (!match) return null; const text = [title.trim(), snippet.trim()].filter(Boolean).join(" — "); if (text.length < 8) return null; const authorName = match[1]; const externalId = match[2]; const raw = { text, permalink, authorName, externalId }; return withSource( { ...raw, score: scoreDiscoveredPost(raw, placementMode), searchTag, }, scanSource ); } /** 研究地圖裡發現帳號時附帶的參考貼文,直接當高品質種子。 */ export function postFromSimilarAccountSeed(account: SimilarAccount): WebDiscoveredPost | null { if (!account.postUrl) return null; const permalink = normalizeThreadsPostUrl(account.postUrl); if (!permalink) return null; const match = permalink.match(/threads\.com\/@([^/]+)\/post\/([^/?#]+)/i); if (!match) return null; const username = normalizeUsername(account.username) || match[1]; const tag = `@${username}`; const text = (account.reason || `相似帳號 @${username} 的參考貼文`).slice(0, 280); const raw = { text, permalink, authorName: username, externalId: match[2], }; return withSource( { ...raw, score: computeScore(raw) * 1.2, searchTag: tag, }, "account" ); } export interface WebDiscoverOptions { perQueryLimit?: number; /** 置入模式:優先找求助/求推薦貼文,並用 after: 篩近期 */ placementMode?: boolean; /** 平行查詢數(預設置入 2、一般 2) */ concurrency?: number; /** 標籤的 searchIntent / searchType,用於組更精準的網搜查詢 */ tagMeta?: Map; /** 置入模式:研究地圖的受眾問題/內容支柱/排除項 */ contentBand?: ContentBandInput; /** Brave 網搜最多幾次查詢;達標或超額即停 */ maxQueries?: number; /** 找到足夠貼文後停止 */ targetPosts?: number; /** 0 表示不跑 Brave 網搜 */ braveQueryBudget?: number; /** MVP:僅 high priority 使用 Brave */ keywordPriority?: KeywordPriority; onProgress?: (message: string) => void | Promise; } function resolveBraveSearchOptions( placementMode: boolean, keywordPriority?: KeywordPriority ): BraveWebSearchOptions { const priority = keywordPriority ?? (placementMode ? "high" : "medium"); return { patrolMode: true, priority, threadsOnly: true, }; } /** 用 Brave Search 找 Threads 貼文連結(Threads API/爬蟲不足時的補充)。 */ export async function discoverPostsViaWebSearch( tags: string[], options?: WebDiscoverOptions ): Promise { const perQueryLimit = options?.perQueryLimit ?? 10; const placementMode = options?.placementMode ?? false; const contentBand = options?.contentBand; const concurrency = options?.concurrency ?? 2; const maxQueries = options?.maxQueries ?? options?.braveQueryBudget ?? resolveBraveQueryCap(placementMode); const targetPosts = options?.targetPosts ?? (placementMode ? PLACEMENT_WEB_SEARCH_TARGET_POSTS : 30); const onProgress = options?.onProgress; const braveOptions = resolveBraveSearchOptions(placementMode, options?.keywordPriority); const keywordTags = tags.filter((t) => !isAccountTag(t)); if (keywordTags.length === 0 && !contentBand) return []; if (braveOptions.priority !== "high") { await onProgress?.("略過 Brave 網搜(僅 high priority keyword 使用 Brave)"); return []; } if (maxQueries <= 0) { await onProgress?.("已略過 Brave 網搜(額度保護)"); return []; } type QueryJob = { tag: string; query: string }; const jobs: QueryJob[] = []; for (const tag of keywordTags) { const meta = options?.tagMeta?.get(tag); for (const query of buildKeywordQueries(tag, placementMode, meta)) { jobs.push({ tag, query }); } } if (placementMode && contentBand) { const after = formatGoogleAfterDate(PLACEMENT_WEB_SEARCH_MAX_AGE_DAYS); const bandPhrases = [ ...contentBand.questions.slice(0, 2), ...contentBand.pillars.slice(0, 1), ] .map((p) => p.trim()) .filter((p) => p.length >= 4 && p.length <= 16); for (const phrase of bandPhrases) { jobs.push({ tag: phrase, query: `site:threads.com "${phrase}" 求推薦 after:${after}`, }); } } const seen = new Set(); const posts: WebDiscoveredPost[] = []; let queriesUsed = 0; let jobIndex = 0; const mergeFound = (found: WebDiscoveredPost[]) => { for (const post of found) { const key = postKey(post); if (seen.has(key)) continue; seen.add(key); posts.push(post); } }; const runJob = async (job: QueryJob): Promise => { try { const { results } = await searchWebThorough(job.query, perQueryLimit, braveOptions); const found: WebDiscoveredPost[] = []; for (const item of results) { const post = parsePostFromUrl( item.link, item.title, item.snippet, job.tag, "web", placementMode ); if (post && passesPlacementWebFilter(post.text, placementMode, contentBand)) { found.push(post); } } return found; } catch { return []; } }; while ( jobIndex < jobs.length && queriesUsed < maxQueries && posts.length < targetPosts ) { const chunk = jobs.slice(jobIndex, jobIndex + concurrency); jobIndex += chunk.length; queriesUsed += chunk.length; const batches = await runWithConcurrency(chunk, runJob, { concurrency, staggerMs: placementMode ? [600, 1400] : [2000, 5000], }); mergeFound(batches.flat()); await onProgress?.( `網搜 ${queriesUsed}/${Math.min(maxQueries, jobs.length)} 次 · 已找到 ${posts.length} 篇` + (posts.length >= targetPosts ? "(達標,停止)" : "") ); } if (posts.length === 0 && !isBraveSearchConfigured()) { await onProgress?.("未設定 BRAVE_SEARCH_API_KEY,請以 Threads API/瀏覽器海巡為主"); } return posts.sort((a, b) => b.score - a.score); } /** 對相似帳號用 site:@username 網搜,在無法開瀏覽器時也能補到帳號向貼文。 */ export async function discoverPostsFromSimilarAccounts( accounts: SimilarAccount[], options?: { perAccountLimit?: number; placementMode?: boolean; keywordPriority?: KeywordPriority; } ): Promise { const placementMode = options?.placementMode ?? false; const braveOptions = resolveBraveSearchOptions(placementMode, options?.keywordPriority); const useBrave = braveOptions.priority === "high"; const perAccountLimit = options?.perAccountLimit ?? 10; const seen = new Set(); const posts: WebDiscoveredPost[] = []; for (const account of accounts) { const username = normalizeUsername(account.username); if (!username) continue; const tag = `@${username}`; // account.reason 是「為何推薦此帳號」,不是貼文原文;不可把它冒充成貼文顯示。 const after = placementMode ? ` after:${formatGoogleAfterDate(PLACEMENT_WEB_SEARCH_MAX_AGE_DAYS)}` : ""; const queries = placementMode ? [ `site:threads.com/@${username} 求推薦${after}`, `site:threads.com/@${username} 請益${after}`, `site:threads.com/@${username}${after}`, `site:threads.net/@${username}${after}`, ] : [`site:threads.com/@${username}`, `site:threads.net/@${username}`]; if (!useBrave) continue; for (const query of queries) { try { const { results } = await searchWebThorough(query, perAccountLimit, braveOptions); for (const item of results) { const post = parsePostFromUrl( item.link, item.title, item.snippet, tag, "account", placementMode ); if (!post) continue; const key = postKey(post); if (seen.has(key)) continue; seen.add(key); posts.push(post); } } catch { // 單一帳號失敗不阻擋整次海巡 } } } return posts.sort((a, b) => b.score - a.score); } function pickPreferredPost( existing: WebDiscoveredPost, incoming: WebDiscoveredPost ): WebDiscoveredPost { const existingPriority = SOURCE_PRIORITY[existing.scanSource ?? "keyword"]; const incomingPriority = SOURCE_PRIORITY[incoming.scanSource ?? "keyword"]; if (incomingPriority > existingPriority) return incoming; if (incomingPriority < existingPriority) return existing; return incoming.score >= existing.score ? incoming : existing; } export function mergeScanPosts( primary: WebDiscoveredPost[], supplemental: WebDiscoveredPost[], max: number ): WebDiscoveredPost[] { const byKey = new Map(); for (const post of [...primary, ...supplemental]) { const key = postKey(post); const existing = byKey.get(key); byKey.set(key, existing ? pickPreferredPost(existing, post) : post); } return [...byKey.values()].sort((a, b) => b.score - a.score).slice(0, max); } export function tagPostSource( post: T, scanSource: ScanPostSource ): WebDiscoveredPost { return withSource(post, scanSource); }