haixunMaster/lib/services/scan.ts

1001 lines
36 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { prisma } from "@/lib/db";
import { getActiveAccountConnectionSettings } from "@/lib/account-connection-settings";
import { isPlacementGoal } from "@/lib/types/topic-goal";
import { assertJobNotCancelled, isJobCancelled } from "@/lib/jobs/cancel";
import { initTaskProgress, setTaskStatus } from "@/lib/jobs/progress";
import { BROWSER_STEP_LABELS, type BrowserCrawlStep } from "@/lib/threads-browser/progress";
import { updateJobProgress } from "@/lib/jobs/progress-server";
import type { JobProgressDetail } from "@/lib/jobs/types";
import { ensureActiveSession } from "@/lib/threads-browser";
import { executeScanTasks, search } from "@/lib/threads-browser/search";
import { getReplyFetchConcurrency } from "@/lib/threads-browser/human-behavior";
import { getRepliesParallel } from "@/lib/threads-browser/replies";
import { keywordSearchViaThreadsApi } from "@/lib/threads-api";
import { getActiveThreadsCredentials } from "@/lib/services/threads-credentials";
import { computePlacementScore, type RankedPost } from "@/lib/ranking";
import { parseResearchMap, parseSelectedTags, threadsProfileUrl, type SimilarAccount } from "@/lib/types/research";
import { humanDelay } from "@/lib/utils";
import { runWithConcurrency } from "@/lib/utils/concurrency";
import {
contentBandFromResearchMap,
isInContentBand,
} from "@/lib/research-content-band";
import { hasPlacementIntent, looksLikeCasualChat } from "@/lib/topic-anchor";
import {
isPostFreshEnough,
PLACEMENT_MAX_POST_AGE_DAYS,
PLACEMENT_WEB_SEARCH_MAX_QUERIES,
PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS,
PLACEMENT_WEB_SEARCH_TARGET_POSTS,
} from "@/lib/scan-recency";
import { isBraveSearchConfigured } from "@/lib/services/web-search";
import {
modeAllowsBrave,
modeAllowsCrawler,
modeAllowsThreads,
searchSourceModeLabel,
} from "@/lib/search/source-mode";
import {
buildPlacementScanTasks,
buildScanTasks,
getSelectedAccountUsernames,
hasPlacementSearchSources,
hasSelectedAccountTags,
resolvePlacementSearchQueries,
resolveScanConcurrency,
splitScanTasks,
} from "./scan-tasks";
import {
discoverPostsFromSimilarAccounts,
discoverPostsViaWebSearch,
mergeScanPosts,
tagPostSource,
} from "./scan-web-discover";
type RankedPostWithTag = RankedPost & { searchTag?: string };
const REPLY_FETCH_TOP_N = 4;
const MAX_MERGED_POSTS = 90;
function rescoreForPlacement(posts: RankedPostWithTag[], placementMode: boolean): RankedPostWithTag[] {
if (!placementMode) return posts;
return posts.map((post) => ({ ...post, score: computePlacementScore(post) }));
}
function applyPlacementPostFilters(
posts: RankedPostWithTag[],
contentBand: ReturnType<typeof contentBandFromResearchMap>
): { posts: RankedPostWithTag[]; removedStale: number; removedCasual: number; removedBand: number } {
let ranked = posts;
const beforeFresh = ranked.length;
ranked = ranked.filter((post) => isPostFreshEnough(post.postedAt));
const removedStale = beforeFresh - ranked.length;
const beforeIntent = ranked.length;
ranked = ranked.filter(
(post) => hasPlacementIntent(post.text) && !looksLikeCasualChat(post.text)
);
const removedCasual = beforeIntent - ranked.length;
let removedBand = 0;
if (contentBand) {
const beforeBand = ranked.length;
ranked = ranked.filter((post) => isInContentBand(post.text, contentBand));
removedBand = beforeBand - ranked.length;
}
return { posts: ranked, removedStale, removedCasual, removedBand };
}
function collectWebAccountTargets(
researchMap: ReturnType<typeof parseResearchMap>,
selectedTags: string[]
): Map<string, { username: string; reason: string; postUrl?: string }> {
if (!hasSelectedAccountTags(selectedTags)) return new Map();
const accountTargets = new Map<string, { username: string; reason: string; postUrl?: string }>();
const selected = new Set(getSelectedAccountUsernames(selectedTags).map((u) => u.toLowerCase()));
for (const account of researchMap?.similarAccounts ?? []) {
const username = account.username.trim();
if (username && selected.has(username.toLowerCase())) {
accountTargets.set(username.toLowerCase(), account);
}
}
return accountTargets;
}
function resolveSearchSource(
primaryCount: number,
webCount: number,
usedApi: boolean,
usedBrowser: boolean
): string {
if (primaryCount === 0 && webCount > 0) return "web";
if (webCount > 0 || (usedApi && usedBrowser)) return "hybrid";
return usedApi ? "api" : "browser";
}
export interface ScanProgressCallback {
(message: string): void | Promise<void>;
}
async function persistProgress(jobId: string | undefined, detail: JobProgressDetail) {
if (!jobId) return;
await updateJobProgress(jobId, detail);
}
export async function runScanForTopic(
topicId: string,
options?: {
useTags?: boolean;
selectedTags?: string[];
jobId?: string;
onProgress?: ScanProgressCallback;
}
) {
const jobId = options?.jobId;
const report = async (msg: string, detail?: JobProgressDetail) => {
await options?.onProgress?.(msg);
if (detail && jobId) {
await persistProgress(jobId, detail);
}
};
await assertJobNotCancelled(jobId);
const topic = await prisma.topic.findUnique({ where: { id: topicId } });
if (!topic) throw new Error("找不到主題");
const topicQuery: string = topic.query;
const placementMode = isPlacementGoal(topic.topicGoal);
const connection = await getActiveAccountConnectionSettings();
const sourceMode = connection.searchSourceMode;
const allowThreads = modeAllowsThreads(sourceMode);
const allowBrave = modeAllowsBrave(sourceMode);
const allowCrawler = modeAllowsCrawler(sourceMode);
const braveKeywordPriority =
placementMode || sourceMode === "brave" || sourceMode === "brave_crawler"
? ("high" as const)
: ("medium" as const);
await report(`海巡搜尋來源:${searchSourceModeLabel(sourceMode)}`);
// API 優先:帳號有連官方 API 就先用 API 海巡;丟錯或沒結果才退回瀏覽器爬蟲。
const apiCredentials =
allowThreads && connection.searchViaApi
? await getActiveThreadsCredentials().catch(() => null)
: null;
// 爬貼文/爬留言只在 Dev 模式開啟時才允許;關閉時一律純官方 API。
// 注意:官方 API 無法讀「別人貼文」底下的留言,所以要抓留言素材時仍需瀏覽器 session即 Dev 模式)。
const needBrowserForReplies = connection.devMode && connection.scrapeReplies;
const researchMap = parseResearchMap(topic.researchMap);
const selectedTags =
options?.selectedTags && options.selectedTags.length > 0
? options.selectedTags
: parseSelectedTags(topic.selectedTags);
const useMultiTag =
!placementMode && options?.useTags !== false && selectedTags.length > 0;
const placementSearchQueries = placementMode
? resolvePlacementSearchQueries(researchMap, topicQuery, selectedTags)
: [];
let ranked: RankedPostWithTag[] = [];
let scanTags: string[] = [];
let scanMode = "single";
let progressDetail: JobProgressDetail = { summary: "準備海巡…", phase: "tasks", tasks: [] };
let session: Awaited<ReturnType<typeof ensureActiveSession>> | null = null;
if (placementMode && !hasPlacementSearchSources(researchMap, topicQuery)) {
throw new Error(
"請先完成 AI 分析,產出「受眾會問什麼」與「內容支柱」後再海巡"
);
}
const builtTasks = placementMode
? buildPlacementScanTasks({ researchMap, seedQuery: topicQuery, selectedTags })
: useMultiTag
? buildScanTasks({
selectedTags,
researchMap,
seedQuery: topic.query,
topicGoal: topic.topicGoal,
topicLabel: topic.label,
})
: [];
const allTasks = builtTasks;
const { keywordTasks, accountTasks } = splitScanTasks(allTasks);
const contentBand = placementMode ? contentBandFromResearchMap(researchMap) : null;
if (!placementMode && useMultiTag && selectedTags.length > 0 && builtTasks.length === 0) {
throw new Error("勾選的標籤均無法用於海巡,請調整選擇後再試");
}
const webSearchTags = placementMode
? placementSearchQueries
: useMultiTag && selectedTags.length > 0
? selectedTags
: [topicQuery];
const accountTargets = collectWebAccountTargets(researchMap, selectedTags);
let webPosts: Awaited<ReturnType<typeof discoverPostsViaWebSearch>> = [];
async function runWebDiscover(
taskLabel: string,
options?: {
summary?: string;
maxQueries?: number;
targetPosts?: number;
braveQueryBudget?: number;
keywordPriority?: "high" | "medium" | "low";
}
): Promise<number> {
progressDetail.phase = "web";
if (!progressDetail.tasks) progressDetail.tasks = [];
if (!progressDetail.tasks.some((t) => t.id === "web")) {
progressDetail.tasks.push({ id: "web", label: taskLabel, status: "running" });
} else {
setTaskStatus(progressDetail, "web", { status: "running", label: taskLabel });
}
progressDetail.summary = options?.summary ?? taskLabel;
await report(progressDetail.summary, progressDetail);
try {
await assertJobNotCancelled(jobId);
const tagMeta = new Map(
(researchMap?.suggestedTags ?? []).map((t) => [
t.tag,
{ searchIntent: t.searchIntent, searchType: t.searchType },
])
);
const accountWebTargets = [...accountTargets.values()];
const [keywordWebPosts, accountWebPosts] = await Promise.all([
discoverPostsViaWebSearch(webSearchTags, {
perQueryLimit: 15,
placementMode,
concurrency: 2,
tagMeta,
contentBand: contentBand ?? undefined,
maxQueries: options?.maxQueries,
targetPosts: options?.targetPosts,
braveQueryBudget: options?.braveQueryBudget,
keywordPriority: braveKeywordPriority,
onProgress: async (msg) => {
progressDetail.summary = msg;
await report(msg, progressDetail);
},
}),
!placementMode && accountWebTargets.length > 0
? discoverPostsFromSimilarAccounts(accountWebTargets.slice(0, 4), {
perAccountLimit: 20,
placementMode,
keywordPriority: braveKeywordPriority,
})
: Promise.resolve([]),
]);
webPosts = [...accountWebPosts, ...keywordWebPosts];
const beforeMerge = ranked.length;
ranked = mergeScanPosts(ranked, webPosts, MAX_MERGED_POSTS);
const added = ranked.length - beforeMerge;
setTaskStatus(progressDetail, "web", { status: "done", found: webPosts.length });
const sourceDetail = placementMode
? `關鍵字 ${keywordWebPosts.length}`
: `相似帳號 ${accountWebPosts.length}、關鍵字 ${keywordWebPosts.length}`;
progressDetail.summary =
webPosts.length > 0
? `${taskLabel}完成:${webPosts.length} 篇(${sourceDetail}),合併後共 ${ranked.length} 篇(新增 ${added} 篇)`
: `${taskLabel}:無額外結果,保留 ${ranked.length}`;
await report(progressDetail.summary, progressDetail);
return webPosts.length;
} catch (error) {
setTaskStatus(progressDetail, "web", {
status: "failed",
error: error instanceof Error ? error.message : "網路搜尋失敗",
});
await report(
`${taskLabel}失敗:${error instanceof Error ? error.message : "未知錯誤"},沿用既有 ${ranked.length}`,
progressDetail
);
return 0;
}
}
async function runApiKeywordSearch(
credentials: NonNullable<typeof apiCredentials>,
tasks: typeof keywordTasks
): Promise<boolean> {
if (tasks.length > 0) {
if (!placementMode) {
scanMode = "multi-tag";
scanTags = allTasks.map((t) => t.label);
}
const concurrency = resolveScanConcurrency(tasks.length);
progressDetail = {
summary: `API 海巡 0/${tasks.length}${concurrency} 路平行)`,
phase: "tasks",
tasks: initTaskProgress(tasks.map((t) => ({ id: t.id, label: t.label }))),
};
await report(progressDetail.summary, progressDetail);
const batches = await runWithConcurrency(
tasks,
async (task) => {
setTaskStatus(progressDetail, task.id, { status: "running" });
await report(`API 海巡:${task.label}`, progressDetail);
try {
const found = await keywordSearchViaThreadsApi(credentials, {
query: task.query,
limit: task.limit,
searchType: placementMode ? "RECENT" : "TOP",
});
setTaskStatus(progressDetail, task.id, { status: "done", found: found.length });
return found.map((post) =>
tagPostSource({ ...post, searchTag: task.label }, "keyword")
);
} catch (error) {
setTaskStatus(progressDetail, task.id, {
status: "failed",
error: error instanceof Error ? error.message : "API 海巡失敗",
});
return [];
}
},
{
concurrency,
shouldAbort: jobId ? () => isJobCancelled(jobId) : undefined,
onProgress: async (done, total) => {
progressDetail.summary = `API 海巡 ${done}/${total}`;
await report(progressDetail.summary, progressDetail);
},
}
);
ranked = mergeScanPosts(
ranked,
rescoreForPlacement(batches.flat(), placementMode),
MAX_MERGED_POSTS
);
return batches.flat().length > 0;
}
scanMode = "single";
scanTags = [topicQuery];
progressDetail = {
summary: `API 搜尋「${topicQuery}」…`,
phase: "tasks",
tasks: [{ id: "single", label: topicQuery, status: "running" }],
};
await report(progressDetail.summary, progressDetail);
const posts = await keywordSearchViaThreadsApi(credentials, {
query: topicQuery,
limit: 20,
searchType: placementMode ? "RECENT" : "TOP",
});
ranked = rescoreForPlacement(
posts.map((post) => tagPostSource({ ...post, searchTag: topicQuery }, "keyword")),
placementMode
);
setTaskStatus(progressDetail, "single", { status: "done", found: ranked.length });
progressDetail.summary = `找到 ${ranked.length}`;
await report(progressDetail.summary, progressDetail);
return ranked.length > 0;
}
async function reportBrowserStep(
taskId: string,
step: BrowserCrawlStep,
detail?: string,
summaryPrefix?: string
) {
const stepLabel = BROWSER_STEP_LABELS[step] ?? step;
setTaskStatus(progressDetail, taskId, {
step: stepLabel,
stepDetail: detail,
});
progressDetail.summary = summaryPrefix
? `${summaryPrefix} · ${stepLabel}${detail ? `${detail}` : ""}`
: `${stepLabel}${detail ? `${detail}` : ""}`;
console.log(`[scan-crawler] task=${taskId} ${progressDetail.summary}`);
await report(progressDetail.summary, progressDetail);
}
async function runBrowserTaskSearch(
activeSession: NonNullable<typeof session>,
tasks: typeof allTasks,
label: string
): Promise<RankedPostWithTag[]> {
if (tasks.length === 0) return [];
const concurrency = resolveScanConcurrency(tasks.length);
if (!progressDetail.tasks || progressDetail.tasks.length === 0) {
progressDetail = {
summary: `${label} 0/${tasks.length}${concurrency} 路平行)`,
phase: "tasks",
tasks: initTaskProgress(tasks.map((t) => ({ id: t.id, label: t.label }))),
};
await report(progressDetail.summary, progressDetail);
} else {
for (const task of tasks) {
if (!progressDetail.tasks!.some((t) => t.id === task.id)) {
progressDetail.tasks!.push({ id: task.id, label: task.label, status: "pending" });
}
}
}
const posts = await executeScanTasks(activeSession.storageState, tasks, {
session: activeSession,
concurrency,
shouldAbort: jobId ? () => isJobCancelled(jobId) : undefined,
onTaskStart: async (task) => {
setTaskStatus(progressDetail, task.id, {
status: "running",
startedAt: Date.now(),
step: BROWSER_STEP_LABELS.session_check,
});
const running = progressDetail.tasks?.filter((t) => t.status === "running").length ?? 0;
const done = progressDetail.tasks?.filter((t) => t.status === "done").length ?? 0;
progressDetail.summary = `${label} ${done}/${tasks.length}${running} 進行中)· ${task.label}`;
await report(progressDetail.summary, progressDetail);
},
onTaskStep: async (task, step, detail) => {
await reportBrowserStep(
task.id,
step,
detail,
`${label} ${task.label}`
);
},
onTaskDone: async (task, found) => {
setTaskStatus(progressDetail, task.id, {
status: "done",
found,
step: undefined,
stepDetail: undefined,
});
const done = progressDetail.tasks?.filter((t) => t.status === "done").length ?? 0;
progressDetail.summary = `${label} ${done}/${tasks.length}${task.label}${found} 篇)`;
await report(progressDetail.summary, progressDetail);
},
onTaskFail: async (task, error) => {
setTaskStatus(progressDetail, task.id, {
status: "failed",
error: error instanceof Error ? error.message : "失敗",
step: undefined,
});
await report(progressDetail.summary, progressDetail);
},
onProgress: async (done, total, taskLabel) => {
progressDetail.summary = `${label} ${done}/${total}${taskLabel}`;
await report(progressDetail.summary, progressDetail);
},
});
return posts.map((post) =>
tagPostSource(
post,
tasks.find((t) => t.label === post.searchTag)?.kind === "account" ? "account" : "keyword"
)
);
}
let usedApi = false;
let usedBrowser = false;
if (allowBrave && !allowThreads && isBraveSearchConfigured()) {
await runWebDiscover("Brave 網路搜尋", {
summary: `以 Brave Search 海巡(${searchSourceModeLabel(sourceMode)})…`,
maxQueries: placementMode ? PLACEMENT_WEB_SEARCH_MAX_QUERIES : 12,
targetPosts: placementMode ? PLACEMENT_WEB_SEARCH_TARGET_POSTS : 25,
keywordPriority: braveKeywordPriority,
});
}
if (apiCredentials) {
try {
if (keywordTasks.length > 0) {
usedApi = await runApiKeywordSearch(apiCredentials, keywordTasks);
} else if (!placementMode) {
usedApi = await runApiKeywordSearch(apiCredentials, []);
}
if (!usedApi && keywordTasks.length > 0) {
await report(
isBraveSearchConfigured()
? "官方 API 關鍵字海巡沒有結果,改用 Brave Search…"
: "官方 API 關鍵字海巡沒有結果,改用瀏覽器…"
);
}
} catch (error) {
usedApi = false;
await report(
`官方 API 海巡失敗:${error instanceof Error ? error.message : "未知錯誤"}`
);
}
}
const supplementThreshold = placementMode ? PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS : 8;
if (
allowBrave &&
allowThreads &&
placementMode &&
keywordTasks.length > 0 &&
ranked.length < supplementThreshold &&
isBraveSearchConfigured()
) {
await runWebDiscover("Brave 網路搜尋", {
summary: `Threads API 結果不足(${ranked.length} 篇),以 Brave Search 補充…`,
maxQueries: PLACEMENT_WEB_SEARCH_MAX_QUERIES,
targetPosts: PLACEMENT_WEB_SEARCH_TARGET_POSTS,
keywordPriority: "high",
});
}
const needBrowserKeywords =
allowCrawler && keywordTasks.length > 0 && ranked.length < supplementThreshold;
const needBrowserAccounts =
allowCrawler && !placementMode && accountTasks.length > 0 && connection.devMode;
if ((needBrowserKeywords || needBrowserAccounts) && allowCrawler && connection.devMode) {
session = await ensureActiveSession();
if (placementMode) {
scanMode = "placement-auto";
scanTags = keywordTasks.map((t) => t.label);
} else if (useMultiTag) {
scanMode = "multi-tag";
scanTags = allTasks.map((t) => t.label);
}
if (needBrowserKeywords) {
const keywordPosts = await runBrowserTaskSearch(session, keywordTasks, "關鍵字海巡");
ranked = mergeScanPosts(
ranked,
rescoreForPlacement(keywordPosts, placementMode),
MAX_MERGED_POSTS
);
usedBrowser = keywordPosts.length > 0 || usedBrowser;
}
if (needBrowserAccounts) {
const accountPosts = await runBrowserTaskSearch(session, accountTasks, "帳號海巡");
ranked = mergeScanPosts(
ranked,
rescoreForPlacement(accountPosts, placementMode),
MAX_MERGED_POSTS
);
usedBrowser = accountPosts.length > 0 || usedBrowser;
}
} else if (needBrowserKeywords) {
if (sourceMode === "threads" || sourceMode === "threads_brave") {
throw new Error(
`海巡來源為「${searchSourceModeLabel(sourceMode)}」,未啟用瀏覽器補漏。請到連線設定改為混合模式或開啟爬蟲,並確認 Threads API 有足夠結果。`
);
}
if (!connection.searchViaApi && !allowCrawler) {
throw new Error(
"未開啟任何海巡方式:請到「連線設定」選擇搜尋來源,並確認 Chrome 同步或官方 API 已就緒。"
);
}
if (connection.searchViaApi && !apiCredentials) {
throw new Error(
"已開啟官方 API 海巡,但此帳號尚未連線官方 API。請到連線設定頁綁定 OAuth或改為 Chrome 同步。"
);
}
throw new Error(
`目前來源「${searchSourceModeLabel(sourceMode)}」沒有可用結果。請調整連線設定或確認 threads_keyword_search 權限已通過 Meta 審核。`
);
} else if (!useMultiTag && !usedApi && allowCrawler && connection.devMode) {
session = await ensureActiveSession();
scanMode = "single";
scanTags = [topicQuery];
progressDetail = {
summary: `瀏覽器搜尋「${topicQuery}」…`,
phase: "tasks",
tasks: [
{
id: "single",
label: topicQuery,
status: "running",
startedAt: Date.now(),
step: BROWSER_STEP_LABELS.session_check,
},
],
};
await report(progressDetail.summary, progressDetail);
const posts = await search(session.storageState, topicQuery, 20, session, {
onStep: async (step, detail) => {
await reportBrowserStep("single", step, detail, `瀏覽器搜尋「${topicQuery}`);
},
});
ranked = rescoreForPlacement(
posts.map((post) => tagPostSource({ ...post, searchTag: topicQuery }, "keyword")),
placementMode
);
setTaskStatus(progressDetail, "single", { status: "done", found: ranked.length });
progressDetail.summary = `找到 ${ranked.length}`;
await report(progressDetail.summary, progressDetail);
usedBrowser = ranked.length > 0;
}
if (needBrowserForReplies && !session) {
// API 已找到貼文,但抓「別人貼文的留言」需要瀏覽器。
try {
session = await ensureActiveSession();
} catch {
session = null;
await report("沒有可用的瀏覽器登入,這次只用官方 API 結果,略過留言抓取。");
}
}
await assertJobNotCancelled(jobId);
if (placementMode) {
scanMode = "placement-auto";
scanTags = placementSearchQueries;
const filtered = applyPlacementPostFilters(ranked, contentBand);
ranked = filtered.posts;
if (filtered.removedStale > 0) {
await report(
`排除 ${filtered.removedStale} 篇超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天的舊貼文,保留近期可置入的貼文`
);
}
if (filtered.removedCasual > 0) {
await report(
`排除 ${filtered.removedCasual} 篇閒聊/無求助訊號的貼文,保留有痛點或求推薦的內容`
);
}
if (filtered.removedBand > 0) {
await report(
`排除 ${filtered.removedBand} 篇不在內容區間內的貼文(須呼應受眾問題或內容支柱,且未觸及排除項)`
);
}
if (
allowBrave &&
ranked.length < PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS &&
isBraveSearchConfigured()
) {
const need = PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS - ranked.length;
await runWebDiscover("Brave 網路搜尋(篩選後補充)", {
summary: `篩選後僅 ${ranked.length} 篇,再補充網搜(最多 ${PLACEMENT_WEB_SEARCH_MAX_QUERIES} 次)…`,
maxQueries: PLACEMENT_WEB_SEARCH_MAX_QUERIES,
targetPosts: Math.min(PLACEMENT_WEB_SEARCH_TARGET_POSTS, need + 8),
keywordPriority: braveKeywordPriority,
});
const refiltered = applyPlacementPostFilters(ranked, contentBand);
ranked = refiltered.posts;
if (refiltered.removedStale + refiltered.removedCasual + refiltered.removedBand > 0) {
await report("網搜補充後已重新套用置入篩選");
}
} else if (
ranked.length < PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS &&
!isBraveSearchConfigured()
) {
await report(
`篩選後僅 ${ranked.length} 篇;未設定 BRAVE_SEARCH_API_KEY略過網搜補充`
);
}
}
const primaryCount = ranked.length;
if (ranked.length === 0) {
throw new Error(
placementMode
? `海巡未找到 ${PLACEMENT_MAX_POST_AGE_DAYS} 天內的適合貼文。建議:① 重新 AI 分析,加強「受眾會問什麼」與「內容支柱」② 到微調面板調整研究地圖 ③ 確認連線設定已開啟 Threads 搜尋`
: "海巡未找到任何貼文。建議:① 重新 AI 分析取得短詞標籤 ② 勾選 @帳號 ③ 到微調面板補充相似帳號"
);
}
progressDetail.phase = "save";
progressDetail.summary = `找到 ${ranked.length} 篇,寫入資料庫…`;
if (!progressDetail.tasks) progressDetail.tasks = [];
progressDetail.tasks.push({ id: "save", label: "寫入資料庫", status: "running" });
await report(progressDetail.summary, progressDetail);
const scan = await prisma.scan.create({
data: {
accountId: topic.accountId,
topicId: topic.id,
scanMode,
scanGoal: topic.topicGoal,
scanTags: JSON.stringify(scanTags),
searchSource: resolveSearchSource(primaryCount, webPosts.length, usedApi, usedBrowser),
},
});
for (const item of ranked) {
await assertJobNotCancelled(jobId);
await prisma.scanItem.create({
data: {
scanId: scan.id,
externalId: item.externalId,
text: item.text,
permalink: item.permalink,
authorName: item.authorName,
postedAt: item.postedAt,
likeCount: item.likeCount,
replyCount: item.replyCount,
score: item.score,
searchTag: item.searchTag,
},
});
}
setTaskStatus(progressDetail, "save", { status: "done", found: ranked.length });
// 先抓留言(若有瀏覽器 session供使用者檢視與後續產文產留言參考。
// 沒有瀏覽器時 session 為 null直接略過海巡仍照常完成。
let repliesCount = 0;
progressDetail.phase = "replies";
progressDetail.tasks!.push({
id: "replies",
label: "抓取留言",
status: "running",
startedAt: Date.now(),
step: "準備中",
});
if (needBrowserForReplies && session) {
const saved = await prisma.scan.findUnique({
where: { id: scan.id },
include: { items: true },
});
const items = saved?.items ?? [];
const replyTargets = [...items]
.sort((a, b) => b.score - a.score)
.filter((item) => item.permalink)
.slice(0, REPLY_FETCH_TOP_N);
if (replyTargets.length > 0) {
progressDetail.summary = `抓取 Top ${replyTargets.length} 篇貼文留言…`;
setTaskStatus(progressDetail, "replies", {
status: "running",
label: `抓取留言(${replyTargets.length} 篇貼文)`,
});
await report(progressDetail.summary, progressDetail);
await assertJobNotCancelled(jobId);
const permalinks = replyTargets.map((i) => i.permalink!);
const repliesByUrl = await getRepliesParallel(
session.storageState,
permalinks,
Math.min(connection.repliesPerPost, 5),
{
concurrency: getReplyFetchConcurrency(),
session,
onProgress: async (done, total, permalink) => {
const short = permalink.split("/").slice(-2).join("/") || permalink;
progressDetail.summary = `抓取留言 ${done}/${total}${short}`;
setTaskStatus(progressDetail, "replies", {
status: "running",
step: `讀取貼文留言 ${done}/${total}`,
stepDetail: short,
startedAt: progressDetail.tasks?.find((t) => t.id === "replies")?.startedAt ?? Date.now(),
});
await report(progressDetail.summary, progressDetail);
},
}
);
for (const item of replyTargets) {
await assertJobNotCancelled(jobId);
const replies = repliesByUrl.get(item.permalink!) ?? [];
for (const reply of replies) {
await prisma.reply.create({
data: {
scanItemId: item.id,
text: reply.text,
authorName: reply.authorName,
likeCount: reply.likeCount,
postedAt: reply.postedAt,
},
});
repliesCount += 1;
}
}
setTaskStatus(progressDetail, "replies", {
status: "done",
label: "抓取留言",
found: repliesCount,
});
progressDetail.summary = `已抓 ${repliesCount} 則留言`;
await report(progressDetail.summary, progressDetail);
} else {
setTaskStatus(progressDetail, "replies", {
status: "done",
label: "抓取留言(無可抓貼文)",
found: 0,
});
progressDetail.summary = "無可抓留言的貼文";
await report(progressDetail.summary, progressDetail);
}
} else {
const skipLabel = connection.scrapeReplies && !connection.devMode
? "抓取留言(需 Dev 模式)"
: needBrowserForReplies && !session
? "抓取留言(無瀏覽器登入)"
: "抓取留言(已關閉)";
setTaskStatus(progressDetail, "replies", {
status: "done",
label: skipLabel,
found: 0,
});
progressDetail.summary = skipLabel;
await report(progressDetail.summary, progressDetail);
}
await prisma.scan.update({
where: { id: scan.id },
data: { repliesFetched: repliesCount > 0, repliesCount },
});
const qualityLabel = "整理結果";
progressDetail.phase = "quality";
progressDetail.summary = `${qualityLabel}中…`;
progressDetail.tasks!.push({ id: "quality", label: qualityLabel, status: "running" });
await report(progressDetail.summary, progressDetail);
await assertJobNotCancelled(jobId);
await applyQualityFilter(scan.id);
const visibleCount = await prisma.scanItem.count({
where: { scanId: scan.id, OR: [{ qualityTier: null }, { qualityTier: { not: "EXCLUDE" } }] },
});
setTaskStatus(progressDetail, "quality", { status: "done", found: visibleCount });
await enrichAccountsFromScan(scan.id, topic.id);
progressDetail.summary = `完成 · ${ranked.length} 篇 · ${repliesCount} 則留言`;
await report(progressDetail.summary, progressDetail);
return prisma.scan.findUnique({
where: { id: scan.id },
include: {
topic: true,
items: {
orderBy: [{ combinedScore: "desc" }, { score: "desc" }],
include: { replies: { orderBy: { likeCount: "desc" } } },
},
},
});
}
export async function applyQualityFilter(scanId: string) {
const scan = await prisma.scan.findUnique({
where: { id: scanId },
include: { topic: true, items: true },
});
if (!scan) throw new Error("找不到海巡紀錄");
const placementMode = isPlacementGoal(scan.scanGoal ?? scan.topic.topicGoal);
for (const item of scan.items) {
const isStale =
placementMode && item.postedAt && !isPostFreshEnough(item.postedAt);
if (isStale) {
await prisma.scanItem.update({
where: { id: item.id },
data: {
relevanceScore: null,
placementScore: null,
qualityTier: "EXCLUDE",
qualityReason: `貼文已超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天,不適合留言置入`,
placementReason: `貼文已超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天,不適合留言置入`,
combinedScore: 0,
},
});
continue;
}
await prisma.scanItem.update({
where: { id: item.id },
data: {
relevanceScore: null,
placementScore: null,
qualityTier: null,
qualityReason: null,
placementReason: null,
combinedScore: item.score,
},
});
}
return [];
}
async function enrichAccountsFromScan(scanId: string, topicId: string) {
const scan = await prisma.scan.findUnique({
where: { id: scanId },
include: {
items: {
where: { OR: [{ qualityTier: null }, { qualityTier: { not: "EXCLUDE" } }] },
orderBy: { combinedScore: "desc" },
},
},
});
if (!scan || scan.items.length === 0) return;
const cutoff = Math.max(3, Math.ceil(scan.items.length * 0.3));
const topItems = scan.items.slice(0, cutoff);
const authorMap = new Map<string, { count: number; maxScore: number; latestPost: Date; reason: string }>();
for (const item of topItems) {
if (!item.authorName) continue;
const key = item.authorName.toLowerCase();
const existing = authorMap.get(key);
if (existing) {
existing.count++;
if ((item.combinedScore ?? item.score) > existing.maxScore) {
existing.maxScore = item.combinedScore ?? item.score;
}
if (item.postedAt && item.postedAt > existing.latestPost) {
existing.latestPost = item.postedAt;
}
} else {
authorMap.set(key, {
count: 1,
maxScore: item.combinedScore ?? item.score,
latestPost: item.postedAt ?? new Date(0),
reason: item.qualityReason || item.text.slice(0, 80) || "海巡發現的高品質作者",
});
}
}
const topic = await prisma.topic.findUnique({ where: { id: topicId } });
if (!topic) return;
const existingMap = parseResearchMap(topic.researchMap);
if (!existingMap) return;
const existingAccounts = existingMap.similarAccounts ?? [];
const existingByKey = new Map(existingAccounts.map((a) => [a.username.toLowerCase(), a]));
const newAccounts: SimilarAccount[] = [];
for (const [key, data] of authorMap) {
const existing = existingByKey.get(key);
if (existing) {
if (!existing.confidence || existing.confidence === "low") {
existing.confidence = data.count >= 2 ? "high" : "medium";
}
if (data.latestPost > new Date(existing.lastActiveAt ?? 0)) {
existing.lastActiveAt = data.latestPost.toISOString();
}
existingByKey.set(key, existing);
} else {
newAccounts.push({
username: key,
reason: data.reason,
source: "scan",
profileUrl: threadsProfileUrl(key) ?? undefined,
confidence: data.count >= 2 ? "high" : "medium",
lastActiveAt: data.latestPost.toISOString(),
});
}
}
if (newAccounts.length === 0) return;
const merged = [...newAccounts, ...existingByKey.values()];
await prisma.topic.update({
where: { id: topicId },
data: { researchMap: JSON.stringify({ ...existingMap, similarAccounts: merged }) },
});
}
export async function runScanForAllActiveTopics(accountId?: string | null) {
const topics = await prisma.topic.findMany({
where: { active: true, ...(accountId ? { accountId } : {}) },
});
const results = [];
for (const topic of topics) {
const scan = await runScanForTopic(topic.id);
results.push(scan);
await humanDelay(2000, 4000);
}
return results;
}