haixunMaster/lib/services/scan.ts

1001 lines
36 KiB
TypeScript
Raw Permalink Normal View History

2026-06-21 12:50:31 +00:00
import { prisma } from "@/lib/db";
import { getActiveAccountConnectionSettings } from "@/lib/account-connection-settings";
import { isPlacementGoal } from "@/lib/types/topic-goal";
import { assertJobNotCancelled, isJobCancelled } from "@/lib/jobs/cancel";
import { initTaskProgress, setTaskStatus } from "@/lib/jobs/progress";
import { BROWSER_STEP_LABELS, type BrowserCrawlStep } from "@/lib/threads-browser/progress";
import { updateJobProgress } from "@/lib/jobs/progress-server";
import type { JobProgressDetail } from "@/lib/jobs/types";
import { ensureActiveSession } from "@/lib/threads-browser";
import { executeScanTasks, search } from "@/lib/threads-browser/search";
import { getReplyFetchConcurrency } from "@/lib/threads-browser/human-behavior";
import { getRepliesParallel } from "@/lib/threads-browser/replies";
import { keywordSearchViaThreadsApi } from "@/lib/threads-api";
import { getActiveThreadsCredentials } from "@/lib/services/threads-credentials";
import { computePlacementScore, type RankedPost } from "@/lib/ranking";
2026-06-21 16:28:26 +00:00
import { parseResearchMap, parseSelectedTags, threadsProfileUrl, type SimilarAccount } from "@/lib/types/research";
2026-06-21 12:50:31 +00:00
import { humanDelay } from "@/lib/utils";
import { runWithConcurrency } from "@/lib/utils/concurrency";
import {
contentBandFromResearchMap,
isInContentBand,
} from "@/lib/research-content-band";
import { hasPlacementIntent, looksLikeCasualChat } from "@/lib/topic-anchor";
import {
isPostFreshEnough,
PLACEMENT_MAX_POST_AGE_DAYS,
PLACEMENT_WEB_SEARCH_MAX_QUERIES,
PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS,
PLACEMENT_WEB_SEARCH_TARGET_POSTS,
} from "@/lib/scan-recency";
import { isBraveSearchConfigured } from "@/lib/services/web-search";
import {
modeAllowsBrave,
modeAllowsCrawler,
modeAllowsThreads,
searchSourceModeLabel,
} from "@/lib/search/source-mode";
import {
buildPlacementScanTasks,
buildScanTasks,
getSelectedAccountUsernames,
hasPlacementSearchSources,
hasSelectedAccountTags,
resolvePlacementSearchQueries,
resolveScanConcurrency,
splitScanTasks,
} from "./scan-tasks";
import {
discoverPostsFromSimilarAccounts,
discoverPostsViaWebSearch,
mergeScanPosts,
tagPostSource,
} from "./scan-web-discover";
type RankedPostWithTag = RankedPost & { searchTag?: string };
const REPLY_FETCH_TOP_N = 4;
const MAX_MERGED_POSTS = 90;
function rescoreForPlacement(posts: RankedPostWithTag[], placementMode: boolean): RankedPostWithTag[] {
if (!placementMode) return posts;
return posts.map((post) => ({ ...post, score: computePlacementScore(post) }));
}
function applyPlacementPostFilters(
posts: RankedPostWithTag[],
contentBand: ReturnType<typeof contentBandFromResearchMap>
): { posts: RankedPostWithTag[]; removedStale: number; removedCasual: number; removedBand: number } {
let ranked = posts;
const beforeFresh = ranked.length;
ranked = ranked.filter((post) => isPostFreshEnough(post.postedAt));
const removedStale = beforeFresh - ranked.length;
const beforeIntent = ranked.length;
ranked = ranked.filter(
(post) => hasPlacementIntent(post.text) && !looksLikeCasualChat(post.text)
);
const removedCasual = beforeIntent - ranked.length;
let removedBand = 0;
if (contentBand) {
const beforeBand = ranked.length;
ranked = ranked.filter((post) => isInContentBand(post.text, contentBand));
removedBand = beforeBand - ranked.length;
}
return { posts: ranked, removedStale, removedCasual, removedBand };
}
function collectWebAccountTargets(
researchMap: ReturnType<typeof parseResearchMap>,
selectedTags: string[]
): Map<string, { username: string; reason: string; postUrl?: string }> {
if (!hasSelectedAccountTags(selectedTags)) return new Map();
const accountTargets = new Map<string, { username: string; reason: string; postUrl?: string }>();
const selected = new Set(getSelectedAccountUsernames(selectedTags).map((u) => u.toLowerCase()));
for (const account of researchMap?.similarAccounts ?? []) {
const username = account.username.trim();
if (username && selected.has(username.toLowerCase())) {
accountTargets.set(username.toLowerCase(), account);
}
}
return accountTargets;
}
function resolveSearchSource(
primaryCount: number,
webCount: number,
usedApi: boolean,
usedBrowser: boolean
): string {
if (primaryCount === 0 && webCount > 0) return "web";
if (webCount > 0 || (usedApi && usedBrowser)) return "hybrid";
return usedApi ? "api" : "browser";
}
export interface ScanProgressCallback {
(message: string): void | Promise<void>;
}
async function persistProgress(jobId: string | undefined, detail: JobProgressDetail) {
if (!jobId) return;
await updateJobProgress(jobId, detail);
}
export async function runScanForTopic(
topicId: string,
options?: {
useTags?: boolean;
selectedTags?: string[];
jobId?: string;
onProgress?: ScanProgressCallback;
}
) {
const jobId = options?.jobId;
const report = async (msg: string, detail?: JobProgressDetail) => {
await options?.onProgress?.(msg);
if (detail && jobId) {
await persistProgress(jobId, detail);
}
};
await assertJobNotCancelled(jobId);
const topic = await prisma.topic.findUnique({ where: { id: topicId } });
if (!topic) throw new Error("找不到主題");
const topicQuery: string = topic.query;
const placementMode = isPlacementGoal(topic.topicGoal);
const connection = await getActiveAccountConnectionSettings();
const sourceMode = connection.searchSourceMode;
const allowThreads = modeAllowsThreads(sourceMode);
const allowBrave = modeAllowsBrave(sourceMode);
const allowCrawler = modeAllowsCrawler(sourceMode);
const braveKeywordPriority =
placementMode || sourceMode === "brave" || sourceMode === "brave_crawler"
? ("high" as const)
: ("medium" as const);
await report(`海巡搜尋來源:${searchSourceModeLabel(sourceMode)}`);
// API 優先:帳號有連官方 API 就先用 API 海巡;丟錯或沒結果才退回瀏覽器爬蟲。
const apiCredentials =
allowThreads && connection.searchViaApi
? await getActiveThreadsCredentials().catch(() => null)
: null;
// 爬貼文/爬留言只在 Dev 模式開啟時才允許;關閉時一律純官方 API。
// 注意:官方 API 無法讀「別人貼文」底下的留言,所以要抓留言素材時仍需瀏覽器 session即 Dev 模式)。
const needBrowserForReplies = connection.devMode && connection.scrapeReplies;
const researchMap = parseResearchMap(topic.researchMap);
const selectedTags =
options?.selectedTags && options.selectedTags.length > 0
? options.selectedTags
: parseSelectedTags(topic.selectedTags);
const useMultiTag =
!placementMode && options?.useTags !== false && selectedTags.length > 0;
const placementSearchQueries = placementMode
? resolvePlacementSearchQueries(researchMap, topicQuery, selectedTags)
: [];
let ranked: RankedPostWithTag[] = [];
let scanTags: string[] = [];
let scanMode = "single";
let progressDetail: JobProgressDetail = { summary: "準備海巡…", phase: "tasks", tasks: [] };
let session: Awaited<ReturnType<typeof ensureActiveSession>> | null = null;
if (placementMode && !hasPlacementSearchSources(researchMap, topicQuery)) {
throw new Error(
"請先完成 AI 分析,產出「受眾會問什麼」與「內容支柱」後再海巡"
);
}
const builtTasks = placementMode
? buildPlacementScanTasks({ researchMap, seedQuery: topicQuery, selectedTags })
: useMultiTag
? buildScanTasks({
selectedTags,
researchMap,
seedQuery: topic.query,
topicGoal: topic.topicGoal,
topicLabel: topic.label,
})
: [];
const allTasks = builtTasks;
const { keywordTasks, accountTasks } = splitScanTasks(allTasks);
const contentBand = placementMode ? contentBandFromResearchMap(researchMap) : null;
if (!placementMode && useMultiTag && selectedTags.length > 0 && builtTasks.length === 0) {
throw new Error("勾選的標籤均無法用於海巡,請調整選擇後再試");
}
const webSearchTags = placementMode
? placementSearchQueries
: useMultiTag && selectedTags.length > 0
? selectedTags
: [topicQuery];
const accountTargets = collectWebAccountTargets(researchMap, selectedTags);
let webPosts: Awaited<ReturnType<typeof discoverPostsViaWebSearch>> = [];
async function runWebDiscover(
taskLabel: string,
options?: {
summary?: string;
maxQueries?: number;
targetPosts?: number;
braveQueryBudget?: number;
keywordPriority?: "high" | "medium" | "low";
}
): Promise<number> {
progressDetail.phase = "web";
if (!progressDetail.tasks) progressDetail.tasks = [];
if (!progressDetail.tasks.some((t) => t.id === "web")) {
progressDetail.tasks.push({ id: "web", label: taskLabel, status: "running" });
} else {
setTaskStatus(progressDetail, "web", { status: "running", label: taskLabel });
}
progressDetail.summary = options?.summary ?? taskLabel;
await report(progressDetail.summary, progressDetail);
try {
await assertJobNotCancelled(jobId);
const tagMeta = new Map(
(researchMap?.suggestedTags ?? []).map((t) => [
t.tag,
{ searchIntent: t.searchIntent, searchType: t.searchType },
])
);
const accountWebTargets = [...accountTargets.values()];
const [keywordWebPosts, accountWebPosts] = await Promise.all([
discoverPostsViaWebSearch(webSearchTags, {
2026-06-21 16:28:26 +00:00
perQueryLimit: 15,
2026-06-21 12:50:31 +00:00
placementMode,
concurrency: 2,
tagMeta,
contentBand: contentBand ?? undefined,
maxQueries: options?.maxQueries,
targetPosts: options?.targetPosts,
braveQueryBudget: options?.braveQueryBudget,
keywordPriority: braveKeywordPriority,
onProgress: async (msg) => {
progressDetail.summary = msg;
await report(msg, progressDetail);
},
}),
!placementMode && accountWebTargets.length > 0
2026-06-21 16:28:26 +00:00
? discoverPostsFromSimilarAccounts(accountWebTargets.slice(0, 4), {
perAccountLimit: 20,
2026-06-21 12:50:31 +00:00
placementMode,
2026-06-21 16:28:26 +00:00
keywordPriority: braveKeywordPriority,
2026-06-21 12:50:31 +00:00
})
: Promise.resolve([]),
]);
webPosts = [...accountWebPosts, ...keywordWebPosts];
const beforeMerge = ranked.length;
ranked = mergeScanPosts(ranked, webPosts, MAX_MERGED_POSTS);
const added = ranked.length - beforeMerge;
setTaskStatus(progressDetail, "web", { status: "done", found: webPosts.length });
const sourceDetail = placementMode
? `關鍵字 ${keywordWebPosts.length}`
: `相似帳號 ${accountWebPosts.length}、關鍵字 ${keywordWebPosts.length}`;
progressDetail.summary =
webPosts.length > 0
? `${taskLabel}完成:${webPosts.length} 篇(${sourceDetail}),合併後共 ${ranked.length} 篇(新增 ${added} 篇)`
: `${taskLabel}:無額外結果,保留 ${ranked.length}`;
await report(progressDetail.summary, progressDetail);
return webPosts.length;
} catch (error) {
setTaskStatus(progressDetail, "web", {
status: "failed",
error: error instanceof Error ? error.message : "網路搜尋失敗",
});
await report(
`${taskLabel}失敗:${error instanceof Error ? error.message : "未知錯誤"},沿用既有 ${ranked.length}`,
progressDetail
);
return 0;
}
}
async function runApiKeywordSearch(
credentials: NonNullable<typeof apiCredentials>,
tasks: typeof keywordTasks
): Promise<boolean> {
if (tasks.length > 0) {
if (!placementMode) {
scanMode = "multi-tag";
scanTags = allTasks.map((t) => t.label);
}
const concurrency = resolveScanConcurrency(tasks.length);
progressDetail = {
summary: `API 海巡 0/${tasks.length}${concurrency} 路平行)`,
phase: "tasks",
tasks: initTaskProgress(tasks.map((t) => ({ id: t.id, label: t.label }))),
};
await report(progressDetail.summary, progressDetail);
const batches = await runWithConcurrency(
tasks,
async (task) => {
setTaskStatus(progressDetail, task.id, { status: "running" });
await report(`API 海巡:${task.label}`, progressDetail);
try {
const found = await keywordSearchViaThreadsApi(credentials, {
query: task.query,
limit: task.limit,
searchType: placementMode ? "RECENT" : "TOP",
});
setTaskStatus(progressDetail, task.id, { status: "done", found: found.length });
return found.map((post) =>
tagPostSource({ ...post, searchTag: task.label }, "keyword")
);
} catch (error) {
setTaskStatus(progressDetail, task.id, {
status: "failed",
error: error instanceof Error ? error.message : "API 海巡失敗",
});
return [];
}
},
{
concurrency,
shouldAbort: jobId ? () => isJobCancelled(jobId) : undefined,
onProgress: async (done, total) => {
progressDetail.summary = `API 海巡 ${done}/${total}`;
await report(progressDetail.summary, progressDetail);
},
}
);
ranked = mergeScanPosts(
ranked,
rescoreForPlacement(batches.flat(), placementMode),
MAX_MERGED_POSTS
);
return batches.flat().length > 0;
}
scanMode = "single";
scanTags = [topicQuery];
progressDetail = {
summary: `API 搜尋「${topicQuery}」…`,
phase: "tasks",
tasks: [{ id: "single", label: topicQuery, status: "running" }],
};
await report(progressDetail.summary, progressDetail);
const posts = await keywordSearchViaThreadsApi(credentials, {
query: topicQuery,
limit: 20,
searchType: placementMode ? "RECENT" : "TOP",
});
ranked = rescoreForPlacement(
posts.map((post) => tagPostSource({ ...post, searchTag: topicQuery }, "keyword")),
placementMode
);
setTaskStatus(progressDetail, "single", { status: "done", found: ranked.length });
progressDetail.summary = `找到 ${ranked.length}`;
await report(progressDetail.summary, progressDetail);
return ranked.length > 0;
}
async function reportBrowserStep(
taskId: string,
step: BrowserCrawlStep,
detail?: string,
summaryPrefix?: string
) {
const stepLabel = BROWSER_STEP_LABELS[step] ?? step;
setTaskStatus(progressDetail, taskId, {
step: stepLabel,
stepDetail: detail,
});
progressDetail.summary = summaryPrefix
? `${summaryPrefix} · ${stepLabel}${detail ? `${detail}` : ""}`
: `${stepLabel}${detail ? `${detail}` : ""}`;
console.log(`[scan-crawler] task=${taskId} ${progressDetail.summary}`);
await report(progressDetail.summary, progressDetail);
}
async function runBrowserTaskSearch(
activeSession: NonNullable<typeof session>,
tasks: typeof allTasks,
label: string
): Promise<RankedPostWithTag[]> {
if (tasks.length === 0) return [];
const concurrency = resolveScanConcurrency(tasks.length);
if (!progressDetail.tasks || progressDetail.tasks.length === 0) {
progressDetail = {
summary: `${label} 0/${tasks.length}${concurrency} 路平行)`,
phase: "tasks",
tasks: initTaskProgress(tasks.map((t) => ({ id: t.id, label: t.label }))),
};
await report(progressDetail.summary, progressDetail);
} else {
for (const task of tasks) {
if (!progressDetail.tasks!.some((t) => t.id === task.id)) {
progressDetail.tasks!.push({ id: task.id, label: task.label, status: "pending" });
}
}
}
const posts = await executeScanTasks(activeSession.storageState, tasks, {
session: activeSession,
concurrency,
shouldAbort: jobId ? () => isJobCancelled(jobId) : undefined,
onTaskStart: async (task) => {
setTaskStatus(progressDetail, task.id, {
status: "running",
startedAt: Date.now(),
step: BROWSER_STEP_LABELS.session_check,
});
const running = progressDetail.tasks?.filter((t) => t.status === "running").length ?? 0;
const done = progressDetail.tasks?.filter((t) => t.status === "done").length ?? 0;
progressDetail.summary = `${label} ${done}/${tasks.length}${running} 進行中)· ${task.label}`;
await report(progressDetail.summary, progressDetail);
},
onTaskStep: async (task, step, detail) => {
await reportBrowserStep(
task.id,
step,
detail,
`${label} ${task.label}`
);
},
onTaskDone: async (task, found) => {
setTaskStatus(progressDetail, task.id, {
status: "done",
found,
step: undefined,
stepDetail: undefined,
});
const done = progressDetail.tasks?.filter((t) => t.status === "done").length ?? 0;
progressDetail.summary = `${label} ${done}/${tasks.length}${task.label}${found} 篇)`;
await report(progressDetail.summary, progressDetail);
},
onTaskFail: async (task, error) => {
setTaskStatus(progressDetail, task.id, {
status: "failed",
error: error instanceof Error ? error.message : "失敗",
step: undefined,
});
await report(progressDetail.summary, progressDetail);
},
onProgress: async (done, total, taskLabel) => {
progressDetail.summary = `${label} ${done}/${total}${taskLabel}`;
await report(progressDetail.summary, progressDetail);
},
});
return posts.map((post) =>
tagPostSource(
post,
tasks.find((t) => t.label === post.searchTag)?.kind === "account" ? "account" : "keyword"
)
);
}
let usedApi = false;
let usedBrowser = false;
if (allowBrave && !allowThreads && isBraveSearchConfigured()) {
await runWebDiscover("Brave 網路搜尋", {
summary: `以 Brave Search 海巡(${searchSourceModeLabel(sourceMode)})…`,
maxQueries: placementMode ? PLACEMENT_WEB_SEARCH_MAX_QUERIES : 12,
targetPosts: placementMode ? PLACEMENT_WEB_SEARCH_TARGET_POSTS : 25,
keywordPriority: braveKeywordPriority,
});
}
if (apiCredentials) {
try {
if (keywordTasks.length > 0) {
usedApi = await runApiKeywordSearch(apiCredentials, keywordTasks);
} else if (!placementMode) {
usedApi = await runApiKeywordSearch(apiCredentials, []);
}
if (!usedApi && keywordTasks.length > 0) {
await report(
isBraveSearchConfigured()
? "官方 API 關鍵字海巡沒有結果,改用 Brave Search…"
: "官方 API 關鍵字海巡沒有結果,改用瀏覽器…"
);
}
} catch (error) {
usedApi = false;
await report(
`官方 API 海巡失敗:${error instanceof Error ? error.message : "未知錯誤"}`
);
}
}
const supplementThreshold = placementMode ? PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS : 8;
if (
allowBrave &&
allowThreads &&
placementMode &&
keywordTasks.length > 0 &&
ranked.length < supplementThreshold &&
isBraveSearchConfigured()
) {
await runWebDiscover("Brave 網路搜尋", {
summary: `Threads API 結果不足(${ranked.length} 篇),以 Brave Search 補充…`,
maxQueries: PLACEMENT_WEB_SEARCH_MAX_QUERIES,
targetPosts: PLACEMENT_WEB_SEARCH_TARGET_POSTS,
keywordPriority: "high",
});
}
const needBrowserKeywords =
allowCrawler && keywordTasks.length > 0 && ranked.length < supplementThreshold;
const needBrowserAccounts =
allowCrawler && !placementMode && accountTasks.length > 0 && connection.devMode;
if ((needBrowserKeywords || needBrowserAccounts) && allowCrawler && connection.devMode) {
session = await ensureActiveSession();
if (placementMode) {
scanMode = "placement-auto";
scanTags = keywordTasks.map((t) => t.label);
} else if (useMultiTag) {
scanMode = "multi-tag";
scanTags = allTasks.map((t) => t.label);
}
if (needBrowserKeywords) {
const keywordPosts = await runBrowserTaskSearch(session, keywordTasks, "關鍵字海巡");
ranked = mergeScanPosts(
ranked,
rescoreForPlacement(keywordPosts, placementMode),
MAX_MERGED_POSTS
);
usedBrowser = keywordPosts.length > 0 || usedBrowser;
}
if (needBrowserAccounts) {
const accountPosts = await runBrowserTaskSearch(session, accountTasks, "帳號海巡");
ranked = mergeScanPosts(
ranked,
rescoreForPlacement(accountPosts, placementMode),
MAX_MERGED_POSTS
);
usedBrowser = accountPosts.length > 0 || usedBrowser;
}
} else if (needBrowserKeywords) {
if (sourceMode === "threads" || sourceMode === "threads_brave") {
throw new Error(
`海巡來源為「${searchSourceModeLabel(sourceMode)}」,未啟用瀏覽器補漏。請到連線設定改為混合模式或開啟爬蟲,並確認 Threads API 有足夠結果。`
);
}
if (!connection.searchViaApi && !allowCrawler) {
throw new Error(
"未開啟任何海巡方式:請到「連線設定」選擇搜尋來源,並確認 Chrome 同步或官方 API 已就緒。"
);
}
if (connection.searchViaApi && !apiCredentials) {
throw new Error(
"已開啟官方 API 海巡,但此帳號尚未連線官方 API。請到連線設定頁綁定 OAuth或改為 Chrome 同步。"
);
}
throw new Error(
`目前來源「${searchSourceModeLabel(sourceMode)}」沒有可用結果。請調整連線設定或確認 threads_keyword_search 權限已通過 Meta 審核。`
);
} else if (!useMultiTag && !usedApi && allowCrawler && connection.devMode) {
session = await ensureActiveSession();
scanMode = "single";
scanTags = [topicQuery];
progressDetail = {
summary: `瀏覽器搜尋「${topicQuery}」…`,
phase: "tasks",
tasks: [
{
id: "single",
label: topicQuery,
status: "running",
startedAt: Date.now(),
step: BROWSER_STEP_LABELS.session_check,
},
],
};
await report(progressDetail.summary, progressDetail);
const posts = await search(session.storageState, topicQuery, 20, session, {
onStep: async (step, detail) => {
await reportBrowserStep("single", step, detail, `瀏覽器搜尋「${topicQuery}`);
},
});
ranked = rescoreForPlacement(
posts.map((post) => tagPostSource({ ...post, searchTag: topicQuery }, "keyword")),
placementMode
);
setTaskStatus(progressDetail, "single", { status: "done", found: ranked.length });
progressDetail.summary = `找到 ${ranked.length}`;
await report(progressDetail.summary, progressDetail);
usedBrowser = ranked.length > 0;
}
if (needBrowserForReplies && !session) {
// API 已找到貼文,但抓「別人貼文的留言」需要瀏覽器。
try {
session = await ensureActiveSession();
} catch {
session = null;
await report("沒有可用的瀏覽器登入,這次只用官方 API 結果,略過留言抓取。");
}
}
await assertJobNotCancelled(jobId);
if (placementMode) {
scanMode = "placement-auto";
scanTags = placementSearchQueries;
const filtered = applyPlacementPostFilters(ranked, contentBand);
ranked = filtered.posts;
if (filtered.removedStale > 0) {
await report(
`排除 ${filtered.removedStale} 篇超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天的舊貼文,保留近期可置入的貼文`
);
}
if (filtered.removedCasual > 0) {
await report(
`排除 ${filtered.removedCasual} 篇閒聊/無求助訊號的貼文,保留有痛點或求推薦的內容`
);
}
if (filtered.removedBand > 0) {
await report(
`排除 ${filtered.removedBand} 篇不在內容區間內的貼文(須呼應受眾問題或內容支柱,且未觸及排除項)`
);
}
if (
allowBrave &&
ranked.length < PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS &&
isBraveSearchConfigured()
) {
const need = PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS - ranked.length;
await runWebDiscover("Brave 網路搜尋(篩選後補充)", {
summary: `篩選後僅 ${ranked.length} 篇,再補充網搜(最多 ${PLACEMENT_WEB_SEARCH_MAX_QUERIES} 次)…`,
maxQueries: PLACEMENT_WEB_SEARCH_MAX_QUERIES,
targetPosts: Math.min(PLACEMENT_WEB_SEARCH_TARGET_POSTS, need + 8),
keywordPriority: braveKeywordPriority,
});
const refiltered = applyPlacementPostFilters(ranked, contentBand);
ranked = refiltered.posts;
if (refiltered.removedStale + refiltered.removedCasual + refiltered.removedBand > 0) {
await report("網搜補充後已重新套用置入篩選");
}
} else if (
ranked.length < PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS &&
!isBraveSearchConfigured()
) {
await report(
`篩選後僅 ${ranked.length} 篇;未設定 BRAVE_SEARCH_API_KEY略過網搜補充`
);
}
}
const primaryCount = ranked.length;
if (ranked.length === 0) {
throw new Error(
placementMode
? `海巡未找到 ${PLACEMENT_MAX_POST_AGE_DAYS} 天內的適合貼文。建議:① 重新 AI 分析,加強「受眾會問什麼」與「內容支柱」② 到微調面板調整研究地圖 ③ 確認連線設定已開啟 Threads 搜尋`
: "海巡未找到任何貼文。建議:① 重新 AI 分析取得短詞標籤 ② 勾選 @帳號 ③ 到微調面板補充相似帳號"
);
}
progressDetail.phase = "save";
progressDetail.summary = `找到 ${ranked.length} 篇,寫入資料庫…`;
if (!progressDetail.tasks) progressDetail.tasks = [];
progressDetail.tasks.push({ id: "save", label: "寫入資料庫", status: "running" });
await report(progressDetail.summary, progressDetail);
const scan = await prisma.scan.create({
data: {
accountId: topic.accountId,
topicId: topic.id,
scanMode,
scanGoal: topic.topicGoal,
scanTags: JSON.stringify(scanTags),
searchSource: resolveSearchSource(primaryCount, webPosts.length, usedApi, usedBrowser),
},
});
for (const item of ranked) {
await assertJobNotCancelled(jobId);
await prisma.scanItem.create({
data: {
scanId: scan.id,
externalId: item.externalId,
text: item.text,
permalink: item.permalink,
authorName: item.authorName,
postedAt: item.postedAt,
likeCount: item.likeCount,
replyCount: item.replyCount,
score: item.score,
searchTag: item.searchTag,
},
});
}
setTaskStatus(progressDetail, "save", { status: "done", found: ranked.length });
// 先抓留言(若有瀏覽器 session供使用者檢視與後續產文產留言參考。
// 沒有瀏覽器時 session 為 null直接略過海巡仍照常完成。
let repliesCount = 0;
progressDetail.phase = "replies";
progressDetail.tasks!.push({
id: "replies",
label: "抓取留言",
status: "running",
startedAt: Date.now(),
step: "準備中",
});
if (needBrowserForReplies && session) {
const saved = await prisma.scan.findUnique({
where: { id: scan.id },
include: { items: true },
});
const items = saved?.items ?? [];
const replyTargets = [...items]
.sort((a, b) => b.score - a.score)
.filter((item) => item.permalink)
.slice(0, REPLY_FETCH_TOP_N);
if (replyTargets.length > 0) {
progressDetail.summary = `抓取 Top ${replyTargets.length} 篇貼文留言…`;
setTaskStatus(progressDetail, "replies", {
status: "running",
label: `抓取留言(${replyTargets.length} 篇貼文)`,
});
await report(progressDetail.summary, progressDetail);
await assertJobNotCancelled(jobId);
const permalinks = replyTargets.map((i) => i.permalink!);
const repliesByUrl = await getRepliesParallel(
session.storageState,
permalinks,
Math.min(connection.repliesPerPost, 5),
{
concurrency: getReplyFetchConcurrency(),
session,
onProgress: async (done, total, permalink) => {
const short = permalink.split("/").slice(-2).join("/") || permalink;
progressDetail.summary = `抓取留言 ${done}/${total}${short}`;
setTaskStatus(progressDetail, "replies", {
status: "running",
step: `讀取貼文留言 ${done}/${total}`,
stepDetail: short,
startedAt: progressDetail.tasks?.find((t) => t.id === "replies")?.startedAt ?? Date.now(),
});
await report(progressDetail.summary, progressDetail);
},
}
);
for (const item of replyTargets) {
await assertJobNotCancelled(jobId);
const replies = repliesByUrl.get(item.permalink!) ?? [];
for (const reply of replies) {
await prisma.reply.create({
data: {
scanItemId: item.id,
text: reply.text,
authorName: reply.authorName,
likeCount: reply.likeCount,
postedAt: reply.postedAt,
},
});
repliesCount += 1;
}
}
setTaskStatus(progressDetail, "replies", {
status: "done",
label: "抓取留言",
found: repliesCount,
});
progressDetail.summary = `已抓 ${repliesCount} 則留言`;
await report(progressDetail.summary, progressDetail);
} else {
setTaskStatus(progressDetail, "replies", {
status: "done",
label: "抓取留言(無可抓貼文)",
found: 0,
});
progressDetail.summary = "無可抓留言的貼文";
await report(progressDetail.summary, progressDetail);
}
} else {
const skipLabel = connection.scrapeReplies && !connection.devMode
? "抓取留言(需 Dev 模式)"
: needBrowserForReplies && !session
? "抓取留言(無瀏覽器登入)"
: "抓取留言(已關閉)";
setTaskStatus(progressDetail, "replies", {
status: "done",
label: skipLabel,
found: 0,
});
progressDetail.summary = skipLabel;
await report(progressDetail.summary, progressDetail);
}
await prisma.scan.update({
where: { id: scan.id },
data: { repliesFetched: repliesCount > 0, repliesCount },
});
const qualityLabel = "整理結果";
progressDetail.phase = "quality";
progressDetail.summary = `${qualityLabel}中…`;
progressDetail.tasks!.push({ id: "quality", label: qualityLabel, status: "running" });
await report(progressDetail.summary, progressDetail);
await assertJobNotCancelled(jobId);
await applyQualityFilter(scan.id);
const visibleCount = await prisma.scanItem.count({
2026-06-21 16:28:26 +00:00
where: { scanId: scan.id, OR: [{ qualityTier: null }, { qualityTier: { not: "EXCLUDE" } }] },
2026-06-21 12:50:31 +00:00
});
setTaskStatus(progressDetail, "quality", { status: "done", found: visibleCount });
2026-06-21 16:28:26 +00:00
await enrichAccountsFromScan(scan.id, topic.id);
2026-06-21 12:50:31 +00:00
progressDetail.summary = `完成 · ${ranked.length} 篇 · ${repliesCount} 則留言`;
await report(progressDetail.summary, progressDetail);
return prisma.scan.findUnique({
where: { id: scan.id },
include: {
topic: true,
items: {
orderBy: [{ combinedScore: "desc" }, { score: "desc" }],
include: { replies: { orderBy: { likeCount: "desc" } } },
},
},
});
}
export async function applyQualityFilter(scanId: string) {
const scan = await prisma.scan.findUnique({
where: { id: scanId },
include: { topic: true, items: true },
});
if (!scan) throw new Error("找不到海巡紀錄");
const placementMode = isPlacementGoal(scan.scanGoal ?? scan.topic.topicGoal);
for (const item of scan.items) {
const isStale =
placementMode && item.postedAt && !isPostFreshEnough(item.postedAt);
if (isStale) {
await prisma.scanItem.update({
where: { id: item.id },
data: {
relevanceScore: null,
placementScore: null,
qualityTier: "EXCLUDE",
qualityReason: `貼文已超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天,不適合留言置入`,
placementReason: `貼文已超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天,不適合留言置入`,
combinedScore: 0,
},
});
continue;
}
await prisma.scanItem.update({
where: { id: item.id },
data: {
relevanceScore: null,
placementScore: null,
qualityTier: null,
qualityReason: null,
placementReason: null,
combinedScore: item.score,
},
});
}
return [];
}
2026-06-21 16:28:26 +00:00
async function enrichAccountsFromScan(scanId: string, topicId: string) {
const scan = await prisma.scan.findUnique({
where: { id: scanId },
include: {
items: {
where: { OR: [{ qualityTier: null }, { qualityTier: { not: "EXCLUDE" } }] },
orderBy: { combinedScore: "desc" },
},
},
});
if (!scan || scan.items.length === 0) return;
const cutoff = Math.max(3, Math.ceil(scan.items.length * 0.3));
const topItems = scan.items.slice(0, cutoff);
const authorMap = new Map<string, { count: number; maxScore: number; latestPost: Date; reason: string }>();
for (const item of topItems) {
if (!item.authorName) continue;
const key = item.authorName.toLowerCase();
const existing = authorMap.get(key);
if (existing) {
existing.count++;
if ((item.combinedScore ?? item.score) > existing.maxScore) {
existing.maxScore = item.combinedScore ?? item.score;
}
if (item.postedAt && item.postedAt > existing.latestPost) {
existing.latestPost = item.postedAt;
}
} else {
authorMap.set(key, {
count: 1,
maxScore: item.combinedScore ?? item.score,
latestPost: item.postedAt ?? new Date(0),
reason: item.qualityReason || item.text.slice(0, 80) || "海巡發現的高品質作者",
});
}
}
const topic = await prisma.topic.findUnique({ where: { id: topicId } });
if (!topic) return;
const existingMap = parseResearchMap(topic.researchMap);
if (!existingMap) return;
const existingAccounts = existingMap.similarAccounts ?? [];
const existingByKey = new Map(existingAccounts.map((a) => [a.username.toLowerCase(), a]));
const newAccounts: SimilarAccount[] = [];
for (const [key, data] of authorMap) {
const existing = existingByKey.get(key);
if (existing) {
if (!existing.confidence || existing.confidence === "low") {
existing.confidence = data.count >= 2 ? "high" : "medium";
}
if (data.latestPost > new Date(existing.lastActiveAt ?? 0)) {
existing.lastActiveAt = data.latestPost.toISOString();
}
existingByKey.set(key, existing);
} else {
newAccounts.push({
username: key,
reason: data.reason,
source: "scan",
profileUrl: threadsProfileUrl(key) ?? undefined,
confidence: data.count >= 2 ? "high" : "medium",
lastActiveAt: data.latestPost.toISOString(),
});
}
}
if (newAccounts.length === 0) return;
const merged = [...newAccounts, ...existingByKey.values()];
await prisma.topic.update({
where: { id: topicId },
data: { researchMap: JSON.stringify({ ...existingMap, similarAccounts: merged }) },
});
}
2026-06-21 12:50:31 +00:00
export async function runScanForAllActiveTopics(accountId?: string | null) {
const topics = await prisma.topic.findMany({
where: { active: true, ...(accountId ? { accountId } : {}) },
});
const results = [];
for (const topic of topics) {
const scan = await runScanForTopic(topic.id);
results.push(scan);
await humanDelay(2000, 4000);
}
return results;
}