import { prisma } from "@/lib/db"; import { getActiveAccountConnectionSettings } from "@/lib/account-connection-settings"; import { isPlacementGoal } from "@/lib/types/topic-goal"; import { assertJobNotCancelled, isJobCancelled } from "@/lib/jobs/cancel"; import { initTaskProgress, setTaskStatus } from "@/lib/jobs/progress"; import { BROWSER_STEP_LABELS, type BrowserCrawlStep } from "@/lib/threads-browser/progress"; import { updateJobProgress } from "@/lib/jobs/progress-server"; import type { JobProgressDetail } from "@/lib/jobs/types"; import { ensureActiveSession } from "@/lib/threads-browser"; import { executeScanTasks, search } from "@/lib/threads-browser/search"; import { getReplyFetchConcurrency } from "@/lib/threads-browser/human-behavior"; import { getRepliesParallel } from "@/lib/threads-browser/replies"; import { keywordSearchViaThreadsApi } from "@/lib/threads-api"; import { getActiveThreadsCredentials } from "@/lib/services/threads-credentials"; import { computePlacementScore, type RankedPost } from "@/lib/ranking"; import { parseResearchMap, parseSelectedTags } from "@/lib/types/research"; import { humanDelay } from "@/lib/utils"; import { runWithConcurrency } from "@/lib/utils/concurrency"; import { contentBandFromResearchMap, isInContentBand, } from "@/lib/research-content-band"; import { hasPlacementIntent, looksLikeCasualChat } from "@/lib/topic-anchor"; import { isPostFreshEnough, PLACEMENT_MAX_POST_AGE_DAYS, PLACEMENT_WEB_SEARCH_MAX_QUERIES, PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS, PLACEMENT_WEB_SEARCH_TARGET_POSTS, } from "@/lib/scan-recency"; import { isBraveSearchConfigured } from "@/lib/services/web-search"; import { modeAllowsBrave, modeAllowsCrawler, modeAllowsThreads, searchSourceModeLabel, } from "@/lib/search/source-mode"; import { buildPlacementScanTasks, buildScanTasks, getSelectedAccountUsernames, hasPlacementSearchSources, hasSelectedAccountTags, resolvePlacementSearchQueries, resolveScanConcurrency, splitScanTasks, } from "./scan-tasks"; import { discoverPostsFromSimilarAccounts, discoverPostsViaWebSearch, mergeScanPosts, tagPostSource, } from "./scan-web-discover"; type RankedPostWithTag = RankedPost & { searchTag?: string }; const REPLY_FETCH_TOP_N = 4; const MAX_MERGED_POSTS = 90; function rescoreForPlacement(posts: RankedPostWithTag[], placementMode: boolean): RankedPostWithTag[] { if (!placementMode) return posts; return posts.map((post) => ({ ...post, score: computePlacementScore(post) })); } function applyPlacementPostFilters( posts: RankedPostWithTag[], contentBand: ReturnType ): { posts: RankedPostWithTag[]; removedStale: number; removedCasual: number; removedBand: number } { let ranked = posts; const beforeFresh = ranked.length; ranked = ranked.filter((post) => isPostFreshEnough(post.postedAt)); const removedStale = beforeFresh - ranked.length; const beforeIntent = ranked.length; ranked = ranked.filter( (post) => hasPlacementIntent(post.text) && !looksLikeCasualChat(post.text) ); const removedCasual = beforeIntent - ranked.length; let removedBand = 0; if (contentBand) { const beforeBand = ranked.length; ranked = ranked.filter((post) => isInContentBand(post.text, contentBand)); removedBand = beforeBand - ranked.length; } return { posts: ranked, removedStale, removedCasual, removedBand }; } function collectWebAccountTargets( researchMap: ReturnType, selectedTags: string[] ): Map { if (!hasSelectedAccountTags(selectedTags)) return new Map(); const accountTargets = new Map(); const selected = new Set(getSelectedAccountUsernames(selectedTags).map((u) => u.toLowerCase())); for (const account of researchMap?.similarAccounts ?? []) { const username = account.username.trim(); if (username && selected.has(username.toLowerCase())) { accountTargets.set(username.toLowerCase(), account); } } return accountTargets; } function resolveSearchSource( primaryCount: number, webCount: number, usedApi: boolean, usedBrowser: boolean ): string { if (primaryCount === 0 && webCount > 0) return "web"; if (webCount > 0 || (usedApi && usedBrowser)) return "hybrid"; return usedApi ? "api" : "browser"; } export interface ScanProgressCallback { (message: string): void | Promise; } async function persistProgress(jobId: string | undefined, detail: JobProgressDetail) { if (!jobId) return; await updateJobProgress(jobId, detail); } export async function runScanForTopic( topicId: string, options?: { useTags?: boolean; selectedTags?: string[]; jobId?: string; onProgress?: ScanProgressCallback; } ) { const jobId = options?.jobId; const report = async (msg: string, detail?: JobProgressDetail) => { await options?.onProgress?.(msg); if (detail && jobId) { await persistProgress(jobId, detail); } }; await assertJobNotCancelled(jobId); const topic = await prisma.topic.findUnique({ where: { id: topicId } }); if (!topic) throw new Error("找不到主題"); const topicQuery: string = topic.query; const placementMode = isPlacementGoal(topic.topicGoal); const connection = await getActiveAccountConnectionSettings(); const sourceMode = connection.searchSourceMode; const allowThreads = modeAllowsThreads(sourceMode); const allowBrave = modeAllowsBrave(sourceMode); const allowCrawler = modeAllowsCrawler(sourceMode); const braveKeywordPriority = placementMode || sourceMode === "brave" || sourceMode === "brave_crawler" ? ("high" as const) : ("medium" as const); await report(`海巡搜尋來源:${searchSourceModeLabel(sourceMode)}`); // API 優先:帳號有連官方 API 就先用 API 海巡;丟錯或沒結果才退回瀏覽器爬蟲。 const apiCredentials = allowThreads && connection.searchViaApi ? await getActiveThreadsCredentials().catch(() => null) : null; // 爬貼文/爬留言只在 Dev 模式開啟時才允許;關閉時一律純官方 API。 // 注意:官方 API 無法讀「別人貼文」底下的留言,所以要抓留言素材時仍需瀏覽器 session(即 Dev 模式)。 const needBrowserForReplies = connection.devMode && connection.scrapeReplies; const researchMap = parseResearchMap(topic.researchMap); const selectedTags = options?.selectedTags && options.selectedTags.length > 0 ? options.selectedTags : parseSelectedTags(topic.selectedTags); const useMultiTag = !placementMode && options?.useTags !== false && selectedTags.length > 0; const placementSearchQueries = placementMode ? resolvePlacementSearchQueries(researchMap, topicQuery, selectedTags) : []; let ranked: RankedPostWithTag[] = []; let scanTags: string[] = []; let scanMode = "single"; let progressDetail: JobProgressDetail = { summary: "準備海巡…", phase: "tasks", tasks: [] }; let session: Awaited> | null = null; if (placementMode && !hasPlacementSearchSources(researchMap, topicQuery)) { throw new Error( "請先完成 AI 分析,產出「受眾會問什麼」與「內容支柱」後再海巡" ); } const builtTasks = placementMode ? buildPlacementScanTasks({ researchMap, seedQuery: topicQuery, selectedTags }) : useMultiTag ? buildScanTasks({ selectedTags, researchMap, seedQuery: topic.query, topicGoal: topic.topicGoal, topicLabel: topic.label, }) : []; const allTasks = builtTasks; const { keywordTasks, accountTasks } = splitScanTasks(allTasks); const contentBand = placementMode ? contentBandFromResearchMap(researchMap) : null; if (!placementMode && useMultiTag && selectedTags.length > 0 && builtTasks.length === 0) { throw new Error("勾選的標籤均無法用於海巡,請調整選擇後再試"); } const webSearchTags = placementMode ? placementSearchQueries : useMultiTag && selectedTags.length > 0 ? selectedTags : [topicQuery]; const accountTargets = collectWebAccountTargets(researchMap, selectedTags); let webPosts: Awaited> = []; async function runWebDiscover( taskLabel: string, options?: { summary?: string; maxQueries?: number; targetPosts?: number; braveQueryBudget?: number; keywordPriority?: "high" | "medium" | "low"; } ): Promise { progressDetail.phase = "web"; if (!progressDetail.tasks) progressDetail.tasks = []; if (!progressDetail.tasks.some((t) => t.id === "web")) { progressDetail.tasks.push({ id: "web", label: taskLabel, status: "running" }); } else { setTaskStatus(progressDetail, "web", { status: "running", label: taskLabel }); } progressDetail.summary = options?.summary ?? taskLabel; await report(progressDetail.summary, progressDetail); try { await assertJobNotCancelled(jobId); const tagMeta = new Map( (researchMap?.suggestedTags ?? []).map((t) => [ t.tag, { searchIntent: t.searchIntent, searchType: t.searchType }, ]) ); const accountWebTargets = [...accountTargets.values()]; const [keywordWebPosts, accountWebPosts] = await Promise.all([ discoverPostsViaWebSearch(webSearchTags, { perQueryLimit: placementMode ? 8 : 10, placementMode, concurrency: 2, tagMeta, contentBand: contentBand ?? undefined, maxQueries: options?.maxQueries, targetPosts: options?.targetPosts, braveQueryBudget: options?.braveQueryBudget, keywordPriority: braveKeywordPriority, onProgress: async (msg) => { progressDetail.summary = msg; await report(msg, progressDetail); }, }), !placementMode && accountWebTargets.length > 0 ? discoverPostsFromSimilarAccounts(accountWebTargets.slice(0, 8), { perAccountLimit: placementMode ? 12 : 10, placementMode, }) : Promise.resolve([]), ]); webPosts = [...accountWebPosts, ...keywordWebPosts]; const beforeMerge = ranked.length; ranked = mergeScanPosts(ranked, webPosts, MAX_MERGED_POSTS); const added = ranked.length - beforeMerge; setTaskStatus(progressDetail, "web", { status: "done", found: webPosts.length }); const sourceDetail = placementMode ? `關鍵字 ${keywordWebPosts.length} 篇` : `相似帳號 ${accountWebPosts.length}、關鍵字 ${keywordWebPosts.length}`; progressDetail.summary = webPosts.length > 0 ? `${taskLabel}完成:${webPosts.length} 篇(${sourceDetail}),合併後共 ${ranked.length} 篇(新增 ${added} 篇)` : `${taskLabel}:無額外結果,保留 ${ranked.length} 篇`; await report(progressDetail.summary, progressDetail); return webPosts.length; } catch (error) { setTaskStatus(progressDetail, "web", { status: "failed", error: error instanceof Error ? error.message : "網路搜尋失敗", }); await report( `${taskLabel}失敗:${error instanceof Error ? error.message : "未知錯誤"},沿用既有 ${ranked.length} 篇`, progressDetail ); return 0; } } async function runApiKeywordSearch( credentials: NonNullable, tasks: typeof keywordTasks ): Promise { if (tasks.length > 0) { if (!placementMode) { scanMode = "multi-tag"; scanTags = allTasks.map((t) => t.label); } const concurrency = resolveScanConcurrency(tasks.length); progressDetail = { summary: `API 海巡 0/${tasks.length}(${concurrency} 路平行)`, phase: "tasks", tasks: initTaskProgress(tasks.map((t) => ({ id: t.id, label: t.label }))), }; await report(progressDetail.summary, progressDetail); const batches = await runWithConcurrency( tasks, async (task) => { setTaskStatus(progressDetail, task.id, { status: "running" }); await report(`API 海巡:${task.label}`, progressDetail); try { const found = await keywordSearchViaThreadsApi(credentials, { query: task.query, limit: task.limit, searchType: placementMode ? "RECENT" : "TOP", }); setTaskStatus(progressDetail, task.id, { status: "done", found: found.length }); return found.map((post) => tagPostSource({ ...post, searchTag: task.label }, "keyword") ); } catch (error) { setTaskStatus(progressDetail, task.id, { status: "failed", error: error instanceof Error ? error.message : "API 海巡失敗", }); return []; } }, { concurrency, shouldAbort: jobId ? () => isJobCancelled(jobId) : undefined, onProgress: async (done, total) => { progressDetail.summary = `API 海巡 ${done}/${total}`; await report(progressDetail.summary, progressDetail); }, } ); ranked = mergeScanPosts( ranked, rescoreForPlacement(batches.flat(), placementMode), MAX_MERGED_POSTS ); return batches.flat().length > 0; } scanMode = "single"; scanTags = [topicQuery]; progressDetail = { summary: `API 搜尋「${topicQuery}」…`, phase: "tasks", tasks: [{ id: "single", label: topicQuery, status: "running" }], }; await report(progressDetail.summary, progressDetail); const posts = await keywordSearchViaThreadsApi(credentials, { query: topicQuery, limit: 20, searchType: placementMode ? "RECENT" : "TOP", }); ranked = rescoreForPlacement( posts.map((post) => tagPostSource({ ...post, searchTag: topicQuery }, "keyword")), placementMode ); setTaskStatus(progressDetail, "single", { status: "done", found: ranked.length }); progressDetail.summary = `找到 ${ranked.length} 篇`; await report(progressDetail.summary, progressDetail); return ranked.length > 0; } async function reportBrowserStep( taskId: string, step: BrowserCrawlStep, detail?: string, summaryPrefix?: string ) { const stepLabel = BROWSER_STEP_LABELS[step] ?? step; setTaskStatus(progressDetail, taskId, { step: stepLabel, stepDetail: detail, }); progressDetail.summary = summaryPrefix ? `${summaryPrefix} · ${stepLabel}${detail ? `(${detail})` : ""}` : `${stepLabel}${detail ? `(${detail})` : ""}`; console.log(`[scan-crawler] task=${taskId} ${progressDetail.summary}`); await report(progressDetail.summary, progressDetail); } async function runBrowserTaskSearch( activeSession: NonNullable, tasks: typeof allTasks, label: string ): Promise { if (tasks.length === 0) return []; const concurrency = resolveScanConcurrency(tasks.length); if (!progressDetail.tasks || progressDetail.tasks.length === 0) { progressDetail = { summary: `${label} 0/${tasks.length}(${concurrency} 路平行)`, phase: "tasks", tasks: initTaskProgress(tasks.map((t) => ({ id: t.id, label: t.label }))), }; await report(progressDetail.summary, progressDetail); } else { for (const task of tasks) { if (!progressDetail.tasks!.some((t) => t.id === task.id)) { progressDetail.tasks!.push({ id: task.id, label: task.label, status: "pending" }); } } } const posts = await executeScanTasks(activeSession.storageState, tasks, { session: activeSession, concurrency, shouldAbort: jobId ? () => isJobCancelled(jobId) : undefined, onTaskStart: async (task) => { setTaskStatus(progressDetail, task.id, { status: "running", startedAt: Date.now(), step: BROWSER_STEP_LABELS.session_check, }); const running = progressDetail.tasks?.filter((t) => t.status === "running").length ?? 0; const done = progressDetail.tasks?.filter((t) => t.status === "done").length ?? 0; progressDetail.summary = `${label} ${done}/${tasks.length}(${running} 進行中)· ${task.label}`; await report(progressDetail.summary, progressDetail); }, onTaskStep: async (task, step, detail) => { await reportBrowserStep( task.id, step, detail, `${label} ${task.label}` ); }, onTaskDone: async (task, found) => { setTaskStatus(progressDetail, task.id, { status: "done", found, step: undefined, stepDetail: undefined, }); const done = progressDetail.tasks?.filter((t) => t.status === "done").length ?? 0; progressDetail.summary = `${label} ${done}/${tasks.length}:${task.label}(${found} 篇)`; await report(progressDetail.summary, progressDetail); }, onTaskFail: async (task, error) => { setTaskStatus(progressDetail, task.id, { status: "failed", error: error instanceof Error ? error.message : "失敗", step: undefined, }); await report(progressDetail.summary, progressDetail); }, onProgress: async (done, total, taskLabel) => { progressDetail.summary = `${label} ${done}/${total}:${taskLabel}`; await report(progressDetail.summary, progressDetail); }, }); return posts.map((post) => tagPostSource( post, tasks.find((t) => t.label === post.searchTag)?.kind === "account" ? "account" : "keyword" ) ); } let usedApi = false; let usedBrowser = false; if (allowBrave && !allowThreads && isBraveSearchConfigured()) { await runWebDiscover("Brave 網路搜尋", { summary: `以 Brave Search 海巡(${searchSourceModeLabel(sourceMode)})…`, maxQueries: placementMode ? PLACEMENT_WEB_SEARCH_MAX_QUERIES : 12, targetPosts: placementMode ? PLACEMENT_WEB_SEARCH_TARGET_POSTS : 25, keywordPriority: braveKeywordPriority, }); } if (apiCredentials) { try { if (keywordTasks.length > 0) { usedApi = await runApiKeywordSearch(apiCredentials, keywordTasks); } else if (!placementMode) { usedApi = await runApiKeywordSearch(apiCredentials, []); } if (!usedApi && keywordTasks.length > 0) { await report( isBraveSearchConfigured() ? "官方 API 關鍵字海巡沒有結果,改用 Brave Search…" : "官方 API 關鍵字海巡沒有結果,改用瀏覽器…" ); } } catch (error) { usedApi = false; await report( `官方 API 海巡失敗:${error instanceof Error ? error.message : "未知錯誤"}` ); } } const supplementThreshold = placementMode ? PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS : 8; if ( allowBrave && allowThreads && placementMode && keywordTasks.length > 0 && ranked.length < supplementThreshold && isBraveSearchConfigured() ) { await runWebDiscover("Brave 網路搜尋", { summary: `Threads API 結果不足(${ranked.length} 篇),以 Brave Search 補充…`, maxQueries: PLACEMENT_WEB_SEARCH_MAX_QUERIES, targetPosts: PLACEMENT_WEB_SEARCH_TARGET_POSTS, keywordPriority: "high", }); } const needBrowserKeywords = allowCrawler && keywordTasks.length > 0 && ranked.length < supplementThreshold; const needBrowserAccounts = allowCrawler && !placementMode && accountTasks.length > 0 && connection.devMode; if ((needBrowserKeywords || needBrowserAccounts) && allowCrawler && connection.devMode) { session = await ensureActiveSession(); if (placementMode) { scanMode = "placement-auto"; scanTags = keywordTasks.map((t) => t.label); } else if (useMultiTag) { scanMode = "multi-tag"; scanTags = allTasks.map((t) => t.label); } if (needBrowserKeywords) { const keywordPosts = await runBrowserTaskSearch(session, keywordTasks, "關鍵字海巡"); ranked = mergeScanPosts( ranked, rescoreForPlacement(keywordPosts, placementMode), MAX_MERGED_POSTS ); usedBrowser = keywordPosts.length > 0 || usedBrowser; } if (needBrowserAccounts) { const accountPosts = await runBrowserTaskSearch(session, accountTasks, "帳號海巡"); ranked = mergeScanPosts( ranked, rescoreForPlacement(accountPosts, placementMode), MAX_MERGED_POSTS ); usedBrowser = accountPosts.length > 0 || usedBrowser; } } else if (needBrowserKeywords) { if (sourceMode === "threads" || sourceMode === "threads_brave") { throw new Error( `海巡來源為「${searchSourceModeLabel(sourceMode)}」,未啟用瀏覽器補漏。請到連線設定改為混合模式或開啟爬蟲,並確認 Threads API 有足夠結果。` ); } if (!connection.searchViaApi && !allowCrawler) { throw new Error( "未開啟任何海巡方式:請到「連線設定」選擇搜尋來源,並確認 Chrome 同步或官方 API 已就緒。" ); } if (connection.searchViaApi && !apiCredentials) { throw new Error( "已開啟官方 API 海巡,但此帳號尚未連線官方 API。請到連線設定頁綁定 OAuth,或改為 Chrome 同步。" ); } throw new Error( `目前來源「${searchSourceModeLabel(sourceMode)}」沒有可用結果。請調整連線設定或確認 threads_keyword_search 權限已通過 Meta 審核。` ); } else if (!useMultiTag && !usedApi && allowCrawler && connection.devMode) { session = await ensureActiveSession(); scanMode = "single"; scanTags = [topicQuery]; progressDetail = { summary: `瀏覽器搜尋「${topicQuery}」…`, phase: "tasks", tasks: [ { id: "single", label: topicQuery, status: "running", startedAt: Date.now(), step: BROWSER_STEP_LABELS.session_check, }, ], }; await report(progressDetail.summary, progressDetail); const posts = await search(session.storageState, topicQuery, 20, session, { onStep: async (step, detail) => { await reportBrowserStep("single", step, detail, `瀏覽器搜尋「${topicQuery}」`); }, }); ranked = rescoreForPlacement( posts.map((post) => tagPostSource({ ...post, searchTag: topicQuery }, "keyword")), placementMode ); setTaskStatus(progressDetail, "single", { status: "done", found: ranked.length }); progressDetail.summary = `找到 ${ranked.length} 篇`; await report(progressDetail.summary, progressDetail); usedBrowser = ranked.length > 0; } if (needBrowserForReplies && !session) { // API 已找到貼文,但抓「別人貼文的留言」需要瀏覽器。 try { session = await ensureActiveSession(); } catch { session = null; await report("沒有可用的瀏覽器登入,這次只用官方 API 結果,略過留言抓取。"); } } await assertJobNotCancelled(jobId); if (placementMode) { scanMode = "placement-auto"; scanTags = placementSearchQueries; const filtered = applyPlacementPostFilters(ranked, contentBand); ranked = filtered.posts; if (filtered.removedStale > 0) { await report( `排除 ${filtered.removedStale} 篇超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天的舊貼文,保留近期可置入的貼文` ); } if (filtered.removedCasual > 0) { await report( `排除 ${filtered.removedCasual} 篇閒聊/無求助訊號的貼文,保留有痛點或求推薦的內容` ); } if (filtered.removedBand > 0) { await report( `排除 ${filtered.removedBand} 篇不在內容區間內的貼文(須呼應受眾問題或內容支柱,且未觸及排除項)` ); } if ( allowBrave && ranked.length < PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS && isBraveSearchConfigured() ) { const need = PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS - ranked.length; await runWebDiscover("Brave 網路搜尋(篩選後補充)", { summary: `篩選後僅 ${ranked.length} 篇,再補充網搜(最多 ${PLACEMENT_WEB_SEARCH_MAX_QUERIES} 次)…`, maxQueries: PLACEMENT_WEB_SEARCH_MAX_QUERIES, targetPosts: Math.min(PLACEMENT_WEB_SEARCH_TARGET_POSTS, need + 8), keywordPriority: braveKeywordPriority, }); const refiltered = applyPlacementPostFilters(ranked, contentBand); ranked = refiltered.posts; if (refiltered.removedStale + refiltered.removedCasual + refiltered.removedBand > 0) { await report("網搜補充後已重新套用置入篩選"); } } else if ( ranked.length < PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS && !isBraveSearchConfigured() ) { await report( `篩選後僅 ${ranked.length} 篇;未設定 BRAVE_SEARCH_API_KEY,略過網搜補充` ); } } const primaryCount = ranked.length; if (ranked.length === 0) { throw new Error( placementMode ? `海巡未找到 ${PLACEMENT_MAX_POST_AGE_DAYS} 天內的適合貼文。建議:① 重新 AI 分析,加強「受眾會問什麼」與「內容支柱」② 到微調面板調整研究地圖 ③ 確認連線設定已開啟 Threads 搜尋` : "海巡未找到任何貼文。建議:① 重新 AI 分析取得短詞標籤 ② 勾選 @帳號 ③ 到微調面板補充相似帳號" ); } progressDetail.phase = "save"; progressDetail.summary = `找到 ${ranked.length} 篇,寫入資料庫…`; if (!progressDetail.tasks) progressDetail.tasks = []; progressDetail.tasks.push({ id: "save", label: "寫入資料庫", status: "running" }); await report(progressDetail.summary, progressDetail); const scan = await prisma.scan.create({ data: { accountId: topic.accountId, topicId: topic.id, scanMode, scanGoal: topic.topicGoal, scanTags: JSON.stringify(scanTags), searchSource: resolveSearchSource(primaryCount, webPosts.length, usedApi, usedBrowser), }, }); for (const item of ranked) { await assertJobNotCancelled(jobId); await prisma.scanItem.create({ data: { scanId: scan.id, externalId: item.externalId, text: item.text, permalink: item.permalink, authorName: item.authorName, postedAt: item.postedAt, likeCount: item.likeCount, replyCount: item.replyCount, score: item.score, searchTag: item.searchTag, }, }); } setTaskStatus(progressDetail, "save", { status: "done", found: ranked.length }); // 先抓留言(若有瀏覽器 session):供使用者檢視與後續產文/產留言參考。 // 沒有瀏覽器時 session 為 null,直接略過,海巡仍照常完成。 let repliesCount = 0; progressDetail.phase = "replies"; progressDetail.tasks!.push({ id: "replies", label: "抓取留言", status: "running", startedAt: Date.now(), step: "準備中", }); if (needBrowserForReplies && session) { const saved = await prisma.scan.findUnique({ where: { id: scan.id }, include: { items: true }, }); const items = saved?.items ?? []; const replyTargets = [...items] .sort((a, b) => b.score - a.score) .filter((item) => item.permalink) .slice(0, REPLY_FETCH_TOP_N); if (replyTargets.length > 0) { progressDetail.summary = `抓取 Top ${replyTargets.length} 篇貼文留言…`; setTaskStatus(progressDetail, "replies", { status: "running", label: `抓取留言(${replyTargets.length} 篇貼文)`, }); await report(progressDetail.summary, progressDetail); await assertJobNotCancelled(jobId); const permalinks = replyTargets.map((i) => i.permalink!); const repliesByUrl = await getRepliesParallel( session.storageState, permalinks, Math.min(connection.repliesPerPost, 5), { concurrency: getReplyFetchConcurrency(), session, onProgress: async (done, total, permalink) => { const short = permalink.split("/").slice(-2).join("/") || permalink; progressDetail.summary = `抓取留言 ${done}/${total}:${short}`; setTaskStatus(progressDetail, "replies", { status: "running", step: `讀取貼文留言 ${done}/${total}`, stepDetail: short, startedAt: progressDetail.tasks?.find((t) => t.id === "replies")?.startedAt ?? Date.now(), }); await report(progressDetail.summary, progressDetail); }, } ); for (const item of replyTargets) { await assertJobNotCancelled(jobId); const replies = repliesByUrl.get(item.permalink!) ?? []; for (const reply of replies) { await prisma.reply.create({ data: { scanItemId: item.id, text: reply.text, authorName: reply.authorName, likeCount: reply.likeCount, postedAt: reply.postedAt, }, }); repliesCount += 1; } } setTaskStatus(progressDetail, "replies", { status: "done", label: "抓取留言", found: repliesCount, }); progressDetail.summary = `已抓 ${repliesCount} 則留言`; await report(progressDetail.summary, progressDetail); } else { setTaskStatus(progressDetail, "replies", { status: "done", label: "抓取留言(無可抓貼文)", found: 0, }); progressDetail.summary = "無可抓留言的貼文"; await report(progressDetail.summary, progressDetail); } } else { const skipLabel = connection.scrapeReplies && !connection.devMode ? "抓取留言(需 Dev 模式)" : needBrowserForReplies && !session ? "抓取留言(無瀏覽器登入)" : "抓取留言(已關閉)"; setTaskStatus(progressDetail, "replies", { status: "done", label: skipLabel, found: 0, }); progressDetail.summary = skipLabel; await report(progressDetail.summary, progressDetail); } await prisma.scan.update({ where: { id: scan.id }, data: { repliesFetched: repliesCount > 0, repliesCount }, }); const qualityLabel = "整理結果"; progressDetail.phase = "quality"; progressDetail.summary = `${qualityLabel}中…`; progressDetail.tasks!.push({ id: "quality", label: qualityLabel, status: "running" }); await report(progressDetail.summary, progressDetail); await assertJobNotCancelled(jobId); await applyQualityFilter(scan.id); const visibleCount = await prisma.scanItem.count({ where: { scanId: scan.id, qualityTier: { not: "EXCLUDE" } }, }); setTaskStatus(progressDetail, "quality", { status: "done", found: visibleCount }); progressDetail.summary = `完成 · ${ranked.length} 篇 · ${repliesCount} 則留言`; await report(progressDetail.summary, progressDetail); return prisma.scan.findUnique({ where: { id: scan.id }, include: { topic: true, items: { orderBy: [{ combinedScore: "desc" }, { score: "desc" }], include: { replies: { orderBy: { likeCount: "desc" } } }, }, }, }); } export async function applyQualityFilter(scanId: string) { const scan = await prisma.scan.findUnique({ where: { id: scanId }, include: { topic: true, items: true }, }); if (!scan) throw new Error("找不到海巡紀錄"); const placementMode = isPlacementGoal(scan.scanGoal ?? scan.topic.topicGoal); for (const item of scan.items) { const isStale = placementMode && item.postedAt && !isPostFreshEnough(item.postedAt); if (isStale) { await prisma.scanItem.update({ where: { id: item.id }, data: { relevanceScore: null, placementScore: null, qualityTier: "EXCLUDE", qualityReason: `貼文已超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天,不適合留言置入`, placementReason: `貼文已超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天,不適合留言置入`, combinedScore: 0, }, }); continue; } await prisma.scanItem.update({ where: { id: item.id }, data: { relevanceScore: null, placementScore: null, qualityTier: null, qualityReason: null, placementReason: null, combinedScore: item.score, }, }); } return []; } export async function runScanForAllActiveTopics(accountId?: string | null) { const topics = await prisma.topic.findMany({ where: { active: true, ...(accountId ? { accountId } : {}) }, }); const results = []; for (const topic of topics) { const scan = await runScanForTopic(topic.id); results.push(scan); await humanDelay(2000, 4000); } return results; }