1001 lines
36 KiB
TypeScript
1001 lines
36 KiB
TypeScript
import { prisma } from "@/lib/db";
|
||
import { getActiveAccountConnectionSettings } from "@/lib/account-connection-settings";
|
||
|
||
|
||
import { isPlacementGoal } from "@/lib/types/topic-goal";
|
||
import { assertJobNotCancelled, isJobCancelled } from "@/lib/jobs/cancel";
|
||
import { initTaskProgress, setTaskStatus } from "@/lib/jobs/progress";
|
||
import { BROWSER_STEP_LABELS, type BrowserCrawlStep } from "@/lib/threads-browser/progress";
|
||
import { updateJobProgress } from "@/lib/jobs/progress-server";
|
||
import type { JobProgressDetail } from "@/lib/jobs/types";
|
||
import { ensureActiveSession } from "@/lib/threads-browser";
|
||
import { executeScanTasks, search } from "@/lib/threads-browser/search";
|
||
import { getReplyFetchConcurrency } from "@/lib/threads-browser/human-behavior";
|
||
import { getRepliesParallel } from "@/lib/threads-browser/replies";
|
||
import { keywordSearchViaThreadsApi } from "@/lib/threads-api";
|
||
import { getActiveThreadsCredentials } from "@/lib/services/threads-credentials";
|
||
import { computePlacementScore, type RankedPost } from "@/lib/ranking";
|
||
import { parseResearchMap, parseSelectedTags, threadsProfileUrl, type SimilarAccount } from "@/lib/types/research";
|
||
import { humanDelay } from "@/lib/utils";
|
||
import { runWithConcurrency } from "@/lib/utils/concurrency";
|
||
import {
|
||
contentBandFromResearchMap,
|
||
isInContentBand,
|
||
} from "@/lib/research-content-band";
|
||
import { hasPlacementIntent, looksLikeCasualChat } from "@/lib/topic-anchor";
|
||
import {
|
||
isPostFreshEnough,
|
||
PLACEMENT_MAX_POST_AGE_DAYS,
|
||
PLACEMENT_WEB_SEARCH_MAX_QUERIES,
|
||
PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS,
|
||
PLACEMENT_WEB_SEARCH_TARGET_POSTS,
|
||
} from "@/lib/scan-recency";
|
||
import { isBraveSearchConfigured } from "@/lib/services/web-search";
|
||
import {
|
||
modeAllowsBrave,
|
||
modeAllowsCrawler,
|
||
modeAllowsThreads,
|
||
searchSourceModeLabel,
|
||
} from "@/lib/search/source-mode";
|
||
import {
|
||
buildPlacementScanTasks,
|
||
buildScanTasks,
|
||
getSelectedAccountUsernames,
|
||
hasPlacementSearchSources,
|
||
hasSelectedAccountTags,
|
||
resolvePlacementSearchQueries,
|
||
resolveScanConcurrency,
|
||
splitScanTasks,
|
||
} from "./scan-tasks";
|
||
import {
|
||
discoverPostsFromSimilarAccounts,
|
||
discoverPostsViaWebSearch,
|
||
mergeScanPosts,
|
||
tagPostSource,
|
||
} from "./scan-web-discover";
|
||
|
||
type RankedPostWithTag = RankedPost & { searchTag?: string };
|
||
|
||
const REPLY_FETCH_TOP_N = 4;
|
||
const MAX_MERGED_POSTS = 90;
|
||
|
||
function rescoreForPlacement(posts: RankedPostWithTag[], placementMode: boolean): RankedPostWithTag[] {
|
||
if (!placementMode) return posts;
|
||
return posts.map((post) => ({ ...post, score: computePlacementScore(post) }));
|
||
}
|
||
|
||
function applyPlacementPostFilters(
|
||
posts: RankedPostWithTag[],
|
||
contentBand: ReturnType<typeof contentBandFromResearchMap>
|
||
): { posts: RankedPostWithTag[]; removedStale: number; removedCasual: number; removedBand: number } {
|
||
let ranked = posts;
|
||
const beforeFresh = ranked.length;
|
||
ranked = ranked.filter((post) => isPostFreshEnough(post.postedAt));
|
||
const removedStale = beforeFresh - ranked.length;
|
||
|
||
const beforeIntent = ranked.length;
|
||
ranked = ranked.filter(
|
||
(post) => hasPlacementIntent(post.text) && !looksLikeCasualChat(post.text)
|
||
);
|
||
const removedCasual = beforeIntent - ranked.length;
|
||
|
||
let removedBand = 0;
|
||
if (contentBand) {
|
||
const beforeBand = ranked.length;
|
||
ranked = ranked.filter((post) => isInContentBand(post.text, contentBand));
|
||
removedBand = beforeBand - ranked.length;
|
||
}
|
||
|
||
return { posts: ranked, removedStale, removedCasual, removedBand };
|
||
}
|
||
|
||
function collectWebAccountTargets(
|
||
researchMap: ReturnType<typeof parseResearchMap>,
|
||
selectedTags: string[]
|
||
): Map<string, { username: string; reason: string; postUrl?: string }> {
|
||
if (!hasSelectedAccountTags(selectedTags)) return new Map();
|
||
|
||
const accountTargets = new Map<string, { username: string; reason: string; postUrl?: string }>();
|
||
const selected = new Set(getSelectedAccountUsernames(selectedTags).map((u) => u.toLowerCase()));
|
||
|
||
for (const account of researchMap?.similarAccounts ?? []) {
|
||
const username = account.username.trim();
|
||
if (username && selected.has(username.toLowerCase())) {
|
||
accountTargets.set(username.toLowerCase(), account);
|
||
}
|
||
}
|
||
|
||
return accountTargets;
|
||
}
|
||
|
||
function resolveSearchSource(
|
||
primaryCount: number,
|
||
webCount: number,
|
||
usedApi: boolean,
|
||
usedBrowser: boolean
|
||
): string {
|
||
if (primaryCount === 0 && webCount > 0) return "web";
|
||
if (webCount > 0 || (usedApi && usedBrowser)) return "hybrid";
|
||
return usedApi ? "api" : "browser";
|
||
}
|
||
|
||
export interface ScanProgressCallback {
|
||
(message: string): void | Promise<void>;
|
||
}
|
||
|
||
async function persistProgress(jobId: string | undefined, detail: JobProgressDetail) {
|
||
if (!jobId) return;
|
||
await updateJobProgress(jobId, detail);
|
||
}
|
||
|
||
export async function runScanForTopic(
|
||
topicId: string,
|
||
options?: {
|
||
useTags?: boolean;
|
||
selectedTags?: string[];
|
||
jobId?: string;
|
||
onProgress?: ScanProgressCallback;
|
||
}
|
||
) {
|
||
const jobId = options?.jobId;
|
||
|
||
const report = async (msg: string, detail?: JobProgressDetail) => {
|
||
await options?.onProgress?.(msg);
|
||
if (detail && jobId) {
|
||
await persistProgress(jobId, detail);
|
||
}
|
||
};
|
||
|
||
await assertJobNotCancelled(jobId);
|
||
|
||
const topic = await prisma.topic.findUnique({ where: { id: topicId } });
|
||
if (!topic) throw new Error("找不到主題");
|
||
const topicQuery: string = topic.query;
|
||
const placementMode = isPlacementGoal(topic.topicGoal);
|
||
|
||
const connection = await getActiveAccountConnectionSettings();
|
||
const sourceMode = connection.searchSourceMode;
|
||
const allowThreads = modeAllowsThreads(sourceMode);
|
||
const allowBrave = modeAllowsBrave(sourceMode);
|
||
const allowCrawler = modeAllowsCrawler(sourceMode);
|
||
const braveKeywordPriority =
|
||
placementMode || sourceMode === "brave" || sourceMode === "brave_crawler"
|
||
? ("high" as const)
|
||
: ("medium" as const);
|
||
|
||
await report(`海巡搜尋來源:${searchSourceModeLabel(sourceMode)}`);
|
||
|
||
// API 優先:帳號有連官方 API 就先用 API 海巡;丟錯或沒結果才退回瀏覽器爬蟲。
|
||
const apiCredentials =
|
||
allowThreads && connection.searchViaApi
|
||
? await getActiveThreadsCredentials().catch(() => null)
|
||
: null;
|
||
// 爬貼文/爬留言只在 Dev 模式開啟時才允許;關閉時一律純官方 API。
|
||
// 注意:官方 API 無法讀「別人貼文」底下的留言,所以要抓留言素材時仍需瀏覽器 session(即 Dev 模式)。
|
||
const needBrowserForReplies = connection.devMode && connection.scrapeReplies;
|
||
const researchMap = parseResearchMap(topic.researchMap);
|
||
const selectedTags =
|
||
options?.selectedTags && options.selectedTags.length > 0
|
||
? options.selectedTags
|
||
: parseSelectedTags(topic.selectedTags);
|
||
const useMultiTag =
|
||
!placementMode && options?.useTags !== false && selectedTags.length > 0;
|
||
|
||
const placementSearchQueries = placementMode
|
||
? resolvePlacementSearchQueries(researchMap, topicQuery, selectedTags)
|
||
: [];
|
||
|
||
let ranked: RankedPostWithTag[] = [];
|
||
let scanTags: string[] = [];
|
||
let scanMode = "single";
|
||
let progressDetail: JobProgressDetail = { summary: "準備海巡…", phase: "tasks", tasks: [] };
|
||
let session: Awaited<ReturnType<typeof ensureActiveSession>> | null = null;
|
||
|
||
if (placementMode && !hasPlacementSearchSources(researchMap, topicQuery)) {
|
||
throw new Error(
|
||
"請先完成 AI 分析,產出「受眾會問什麼」與「內容支柱」後再海巡"
|
||
);
|
||
}
|
||
|
||
const builtTasks = placementMode
|
||
? buildPlacementScanTasks({ researchMap, seedQuery: topicQuery, selectedTags })
|
||
: useMultiTag
|
||
? buildScanTasks({
|
||
selectedTags,
|
||
researchMap,
|
||
seedQuery: topic.query,
|
||
topicGoal: topic.topicGoal,
|
||
topicLabel: topic.label,
|
||
})
|
||
: [];
|
||
const allTasks = builtTasks;
|
||
const { keywordTasks, accountTasks } = splitScanTasks(allTasks);
|
||
const contentBand = placementMode ? contentBandFromResearchMap(researchMap) : null;
|
||
|
||
if (!placementMode && useMultiTag && selectedTags.length > 0 && builtTasks.length === 0) {
|
||
throw new Error("勾選的標籤均無法用於海巡,請調整選擇後再試");
|
||
}
|
||
|
||
const webSearchTags = placementMode
|
||
? placementSearchQueries
|
||
: useMultiTag && selectedTags.length > 0
|
||
? selectedTags
|
||
: [topicQuery];
|
||
const accountTargets = collectWebAccountTargets(researchMap, selectedTags);
|
||
let webPosts: Awaited<ReturnType<typeof discoverPostsViaWebSearch>> = [];
|
||
|
||
async function runWebDiscover(
|
||
taskLabel: string,
|
||
options?: {
|
||
summary?: string;
|
||
maxQueries?: number;
|
||
targetPosts?: number;
|
||
braveQueryBudget?: number;
|
||
keywordPriority?: "high" | "medium" | "low";
|
||
}
|
||
): Promise<number> {
|
||
progressDetail.phase = "web";
|
||
if (!progressDetail.tasks) progressDetail.tasks = [];
|
||
if (!progressDetail.tasks.some((t) => t.id === "web")) {
|
||
progressDetail.tasks.push({ id: "web", label: taskLabel, status: "running" });
|
||
} else {
|
||
setTaskStatus(progressDetail, "web", { status: "running", label: taskLabel });
|
||
}
|
||
progressDetail.summary = options?.summary ?? taskLabel;
|
||
await report(progressDetail.summary, progressDetail);
|
||
|
||
try {
|
||
await assertJobNotCancelled(jobId);
|
||
const tagMeta = new Map(
|
||
(researchMap?.suggestedTags ?? []).map((t) => [
|
||
t.tag,
|
||
{ searchIntent: t.searchIntent, searchType: t.searchType },
|
||
])
|
||
);
|
||
const accountWebTargets = [...accountTargets.values()];
|
||
const [keywordWebPosts, accountWebPosts] = await Promise.all([
|
||
discoverPostsViaWebSearch(webSearchTags, {
|
||
perQueryLimit: 15,
|
||
placementMode,
|
||
concurrency: 2,
|
||
tagMeta,
|
||
contentBand: contentBand ?? undefined,
|
||
maxQueries: options?.maxQueries,
|
||
targetPosts: options?.targetPosts,
|
||
braveQueryBudget: options?.braveQueryBudget,
|
||
keywordPriority: braveKeywordPriority,
|
||
onProgress: async (msg) => {
|
||
progressDetail.summary = msg;
|
||
await report(msg, progressDetail);
|
||
},
|
||
}),
|
||
!placementMode && accountWebTargets.length > 0
|
||
? discoverPostsFromSimilarAccounts(accountWebTargets.slice(0, 4), {
|
||
perAccountLimit: 20,
|
||
placementMode,
|
||
keywordPriority: braveKeywordPriority,
|
||
})
|
||
: Promise.resolve([]),
|
||
]);
|
||
webPosts = [...accountWebPosts, ...keywordWebPosts];
|
||
const beforeMerge = ranked.length;
|
||
ranked = mergeScanPosts(ranked, webPosts, MAX_MERGED_POSTS);
|
||
const added = ranked.length - beforeMerge;
|
||
setTaskStatus(progressDetail, "web", { status: "done", found: webPosts.length });
|
||
const sourceDetail = placementMode
|
||
? `關鍵字 ${keywordWebPosts.length} 篇`
|
||
: `相似帳號 ${accountWebPosts.length}、關鍵字 ${keywordWebPosts.length}`;
|
||
progressDetail.summary =
|
||
webPosts.length > 0
|
||
? `${taskLabel}完成:${webPosts.length} 篇(${sourceDetail}),合併後共 ${ranked.length} 篇(新增 ${added} 篇)`
|
||
: `${taskLabel}:無額外結果,保留 ${ranked.length} 篇`;
|
||
await report(progressDetail.summary, progressDetail);
|
||
return webPosts.length;
|
||
} catch (error) {
|
||
setTaskStatus(progressDetail, "web", {
|
||
status: "failed",
|
||
error: error instanceof Error ? error.message : "網路搜尋失敗",
|
||
});
|
||
await report(
|
||
`${taskLabel}失敗:${error instanceof Error ? error.message : "未知錯誤"},沿用既有 ${ranked.length} 篇`,
|
||
progressDetail
|
||
);
|
||
return 0;
|
||
}
|
||
}
|
||
|
||
async function runApiKeywordSearch(
|
||
credentials: NonNullable<typeof apiCredentials>,
|
||
tasks: typeof keywordTasks
|
||
): Promise<boolean> {
|
||
if (tasks.length > 0) {
|
||
if (!placementMode) {
|
||
scanMode = "multi-tag";
|
||
scanTags = allTasks.map((t) => t.label);
|
||
}
|
||
const concurrency = resolveScanConcurrency(tasks.length);
|
||
progressDetail = {
|
||
summary: `API 海巡 0/${tasks.length}(${concurrency} 路平行)`,
|
||
phase: "tasks",
|
||
tasks: initTaskProgress(tasks.map((t) => ({ id: t.id, label: t.label }))),
|
||
};
|
||
await report(progressDetail.summary, progressDetail);
|
||
const batches = await runWithConcurrency(
|
||
tasks,
|
||
async (task) => {
|
||
setTaskStatus(progressDetail, task.id, { status: "running" });
|
||
await report(`API 海巡:${task.label}`, progressDetail);
|
||
try {
|
||
const found = await keywordSearchViaThreadsApi(credentials, {
|
||
query: task.query,
|
||
limit: task.limit,
|
||
searchType: placementMode ? "RECENT" : "TOP",
|
||
});
|
||
setTaskStatus(progressDetail, task.id, { status: "done", found: found.length });
|
||
return found.map((post) =>
|
||
tagPostSource({ ...post, searchTag: task.label }, "keyword")
|
||
);
|
||
} catch (error) {
|
||
setTaskStatus(progressDetail, task.id, {
|
||
status: "failed",
|
||
error: error instanceof Error ? error.message : "API 海巡失敗",
|
||
});
|
||
return [];
|
||
}
|
||
},
|
||
{
|
||
concurrency,
|
||
shouldAbort: jobId ? () => isJobCancelled(jobId) : undefined,
|
||
onProgress: async (done, total) => {
|
||
progressDetail.summary = `API 海巡 ${done}/${total}`;
|
||
await report(progressDetail.summary, progressDetail);
|
||
},
|
||
}
|
||
);
|
||
ranked = mergeScanPosts(
|
||
ranked,
|
||
rescoreForPlacement(batches.flat(), placementMode),
|
||
MAX_MERGED_POSTS
|
||
);
|
||
return batches.flat().length > 0;
|
||
}
|
||
|
||
scanMode = "single";
|
||
scanTags = [topicQuery];
|
||
progressDetail = {
|
||
summary: `API 搜尋「${topicQuery}」…`,
|
||
phase: "tasks",
|
||
tasks: [{ id: "single", label: topicQuery, status: "running" }],
|
||
};
|
||
await report(progressDetail.summary, progressDetail);
|
||
const posts = await keywordSearchViaThreadsApi(credentials, {
|
||
query: topicQuery,
|
||
limit: 20,
|
||
searchType: placementMode ? "RECENT" : "TOP",
|
||
});
|
||
ranked = rescoreForPlacement(
|
||
posts.map((post) => tagPostSource({ ...post, searchTag: topicQuery }, "keyword")),
|
||
placementMode
|
||
);
|
||
setTaskStatus(progressDetail, "single", { status: "done", found: ranked.length });
|
||
progressDetail.summary = `找到 ${ranked.length} 篇`;
|
||
await report(progressDetail.summary, progressDetail);
|
||
return ranked.length > 0;
|
||
}
|
||
|
||
async function reportBrowserStep(
|
||
taskId: string,
|
||
step: BrowserCrawlStep,
|
||
detail?: string,
|
||
summaryPrefix?: string
|
||
) {
|
||
const stepLabel = BROWSER_STEP_LABELS[step] ?? step;
|
||
setTaskStatus(progressDetail, taskId, {
|
||
step: stepLabel,
|
||
stepDetail: detail,
|
||
});
|
||
progressDetail.summary = summaryPrefix
|
||
? `${summaryPrefix} · ${stepLabel}${detail ? `(${detail})` : ""}`
|
||
: `${stepLabel}${detail ? `(${detail})` : ""}`;
|
||
console.log(`[scan-crawler] task=${taskId} ${progressDetail.summary}`);
|
||
await report(progressDetail.summary, progressDetail);
|
||
}
|
||
|
||
async function runBrowserTaskSearch(
|
||
activeSession: NonNullable<typeof session>,
|
||
tasks: typeof allTasks,
|
||
label: string
|
||
): Promise<RankedPostWithTag[]> {
|
||
if (tasks.length === 0) return [];
|
||
|
||
const concurrency = resolveScanConcurrency(tasks.length);
|
||
if (!progressDetail.tasks || progressDetail.tasks.length === 0) {
|
||
progressDetail = {
|
||
summary: `${label} 0/${tasks.length}(${concurrency} 路平行)`,
|
||
phase: "tasks",
|
||
tasks: initTaskProgress(tasks.map((t) => ({ id: t.id, label: t.label }))),
|
||
};
|
||
await report(progressDetail.summary, progressDetail);
|
||
} else {
|
||
for (const task of tasks) {
|
||
if (!progressDetail.tasks!.some((t) => t.id === task.id)) {
|
||
progressDetail.tasks!.push({ id: task.id, label: task.label, status: "pending" });
|
||
}
|
||
}
|
||
}
|
||
|
||
const posts = await executeScanTasks(activeSession.storageState, tasks, {
|
||
session: activeSession,
|
||
concurrency,
|
||
shouldAbort: jobId ? () => isJobCancelled(jobId) : undefined,
|
||
onTaskStart: async (task) => {
|
||
setTaskStatus(progressDetail, task.id, {
|
||
status: "running",
|
||
startedAt: Date.now(),
|
||
step: BROWSER_STEP_LABELS.session_check,
|
||
});
|
||
const running = progressDetail.tasks?.filter((t) => t.status === "running").length ?? 0;
|
||
const done = progressDetail.tasks?.filter((t) => t.status === "done").length ?? 0;
|
||
progressDetail.summary = `${label} ${done}/${tasks.length}(${running} 進行中)· ${task.label}`;
|
||
await report(progressDetail.summary, progressDetail);
|
||
},
|
||
onTaskStep: async (task, step, detail) => {
|
||
await reportBrowserStep(
|
||
task.id,
|
||
step,
|
||
detail,
|
||
`${label} ${task.label}`
|
||
);
|
||
},
|
||
onTaskDone: async (task, found) => {
|
||
setTaskStatus(progressDetail, task.id, {
|
||
status: "done",
|
||
found,
|
||
step: undefined,
|
||
stepDetail: undefined,
|
||
});
|
||
const done = progressDetail.tasks?.filter((t) => t.status === "done").length ?? 0;
|
||
progressDetail.summary = `${label} ${done}/${tasks.length}:${task.label}(${found} 篇)`;
|
||
await report(progressDetail.summary, progressDetail);
|
||
},
|
||
onTaskFail: async (task, error) => {
|
||
setTaskStatus(progressDetail, task.id, {
|
||
status: "failed",
|
||
error: error instanceof Error ? error.message : "失敗",
|
||
step: undefined,
|
||
});
|
||
await report(progressDetail.summary, progressDetail);
|
||
},
|
||
onProgress: async (done, total, taskLabel) => {
|
||
progressDetail.summary = `${label} ${done}/${total}:${taskLabel}`;
|
||
await report(progressDetail.summary, progressDetail);
|
||
},
|
||
});
|
||
|
||
return posts.map((post) =>
|
||
tagPostSource(
|
||
post,
|
||
tasks.find((t) => t.label === post.searchTag)?.kind === "account" ? "account" : "keyword"
|
||
)
|
||
);
|
||
}
|
||
|
||
let usedApi = false;
|
||
let usedBrowser = false;
|
||
|
||
if (allowBrave && !allowThreads && isBraveSearchConfigured()) {
|
||
await runWebDiscover("Brave 網路搜尋", {
|
||
summary: `以 Brave Search 海巡(${searchSourceModeLabel(sourceMode)})…`,
|
||
maxQueries: placementMode ? PLACEMENT_WEB_SEARCH_MAX_QUERIES : 12,
|
||
targetPosts: placementMode ? PLACEMENT_WEB_SEARCH_TARGET_POSTS : 25,
|
||
keywordPriority: braveKeywordPriority,
|
||
});
|
||
}
|
||
|
||
if (apiCredentials) {
|
||
try {
|
||
if (keywordTasks.length > 0) {
|
||
usedApi = await runApiKeywordSearch(apiCredentials, keywordTasks);
|
||
} else if (!placementMode) {
|
||
usedApi = await runApiKeywordSearch(apiCredentials, []);
|
||
}
|
||
if (!usedApi && keywordTasks.length > 0) {
|
||
await report(
|
||
isBraveSearchConfigured()
|
||
? "官方 API 關鍵字海巡沒有結果,改用 Brave Search…"
|
||
: "官方 API 關鍵字海巡沒有結果,改用瀏覽器…"
|
||
);
|
||
}
|
||
} catch (error) {
|
||
usedApi = false;
|
||
await report(
|
||
`官方 API 海巡失敗:${error instanceof Error ? error.message : "未知錯誤"}`
|
||
);
|
||
}
|
||
}
|
||
|
||
const supplementThreshold = placementMode ? PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS : 8;
|
||
if (
|
||
allowBrave &&
|
||
allowThreads &&
|
||
placementMode &&
|
||
keywordTasks.length > 0 &&
|
||
ranked.length < supplementThreshold &&
|
||
isBraveSearchConfigured()
|
||
) {
|
||
await runWebDiscover("Brave 網路搜尋", {
|
||
summary: `Threads API 結果不足(${ranked.length} 篇),以 Brave Search 補充…`,
|
||
maxQueries: PLACEMENT_WEB_SEARCH_MAX_QUERIES,
|
||
targetPosts: PLACEMENT_WEB_SEARCH_TARGET_POSTS,
|
||
keywordPriority: "high",
|
||
});
|
||
}
|
||
|
||
const needBrowserKeywords =
|
||
allowCrawler && keywordTasks.length > 0 && ranked.length < supplementThreshold;
|
||
const needBrowserAccounts =
|
||
allowCrawler && !placementMode && accountTasks.length > 0 && connection.devMode;
|
||
|
||
if ((needBrowserKeywords || needBrowserAccounts) && allowCrawler && connection.devMode) {
|
||
session = await ensureActiveSession();
|
||
if (placementMode) {
|
||
scanMode = "placement-auto";
|
||
scanTags = keywordTasks.map((t) => t.label);
|
||
} else if (useMultiTag) {
|
||
scanMode = "multi-tag";
|
||
scanTags = allTasks.map((t) => t.label);
|
||
}
|
||
|
||
if (needBrowserKeywords) {
|
||
const keywordPosts = await runBrowserTaskSearch(session, keywordTasks, "關鍵字海巡");
|
||
ranked = mergeScanPosts(
|
||
ranked,
|
||
rescoreForPlacement(keywordPosts, placementMode),
|
||
MAX_MERGED_POSTS
|
||
);
|
||
usedBrowser = keywordPosts.length > 0 || usedBrowser;
|
||
}
|
||
|
||
if (needBrowserAccounts) {
|
||
const accountPosts = await runBrowserTaskSearch(session, accountTasks, "帳號海巡");
|
||
ranked = mergeScanPosts(
|
||
ranked,
|
||
rescoreForPlacement(accountPosts, placementMode),
|
||
MAX_MERGED_POSTS
|
||
);
|
||
usedBrowser = accountPosts.length > 0 || usedBrowser;
|
||
}
|
||
} else if (needBrowserKeywords) {
|
||
if (sourceMode === "threads" || sourceMode === "threads_brave") {
|
||
throw new Error(
|
||
`海巡來源為「${searchSourceModeLabel(sourceMode)}」,未啟用瀏覽器補漏。請到連線設定改為混合模式或開啟爬蟲,並確認 Threads API 有足夠結果。`
|
||
);
|
||
}
|
||
if (!connection.searchViaApi && !allowCrawler) {
|
||
throw new Error(
|
||
"未開啟任何海巡方式:請到「連線設定」選擇搜尋來源,並確認 Chrome 同步或官方 API 已就緒。"
|
||
);
|
||
}
|
||
if (connection.searchViaApi && !apiCredentials) {
|
||
throw new Error(
|
||
"已開啟官方 API 海巡,但此帳號尚未連線官方 API。請到連線設定頁綁定 OAuth,或改為 Chrome 同步。"
|
||
);
|
||
}
|
||
throw new Error(
|
||
`目前來源「${searchSourceModeLabel(sourceMode)}」沒有可用結果。請調整連線設定或確認 threads_keyword_search 權限已通過 Meta 審核。`
|
||
);
|
||
} else if (!useMultiTag && !usedApi && allowCrawler && connection.devMode) {
|
||
session = await ensureActiveSession();
|
||
scanMode = "single";
|
||
scanTags = [topicQuery];
|
||
progressDetail = {
|
||
summary: `瀏覽器搜尋「${topicQuery}」…`,
|
||
phase: "tasks",
|
||
tasks: [
|
||
{
|
||
id: "single",
|
||
label: topicQuery,
|
||
status: "running",
|
||
startedAt: Date.now(),
|
||
step: BROWSER_STEP_LABELS.session_check,
|
||
},
|
||
],
|
||
};
|
||
await report(progressDetail.summary, progressDetail);
|
||
const posts = await search(session.storageState, topicQuery, 20, session, {
|
||
onStep: async (step, detail) => {
|
||
await reportBrowserStep("single", step, detail, `瀏覽器搜尋「${topicQuery}」`);
|
||
},
|
||
});
|
||
ranked = rescoreForPlacement(
|
||
posts.map((post) => tagPostSource({ ...post, searchTag: topicQuery }, "keyword")),
|
||
placementMode
|
||
);
|
||
setTaskStatus(progressDetail, "single", { status: "done", found: ranked.length });
|
||
progressDetail.summary = `找到 ${ranked.length} 篇`;
|
||
await report(progressDetail.summary, progressDetail);
|
||
usedBrowser = ranked.length > 0;
|
||
}
|
||
|
||
if (needBrowserForReplies && !session) {
|
||
// API 已找到貼文,但抓「別人貼文的留言」需要瀏覽器。
|
||
try {
|
||
session = await ensureActiveSession();
|
||
} catch {
|
||
session = null;
|
||
await report("沒有可用的瀏覽器登入,這次只用官方 API 結果,略過留言抓取。");
|
||
}
|
||
}
|
||
|
||
await assertJobNotCancelled(jobId);
|
||
|
||
if (placementMode) {
|
||
scanMode = "placement-auto";
|
||
scanTags = placementSearchQueries;
|
||
|
||
const filtered = applyPlacementPostFilters(ranked, contentBand);
|
||
ranked = filtered.posts;
|
||
if (filtered.removedStale > 0) {
|
||
await report(
|
||
`排除 ${filtered.removedStale} 篇超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天的舊貼文,保留近期可置入的貼文`
|
||
);
|
||
}
|
||
if (filtered.removedCasual > 0) {
|
||
await report(
|
||
`排除 ${filtered.removedCasual} 篇閒聊/無求助訊號的貼文,保留有痛點或求推薦的內容`
|
||
);
|
||
}
|
||
if (filtered.removedBand > 0) {
|
||
await report(
|
||
`排除 ${filtered.removedBand} 篇不在內容區間內的貼文(須呼應受眾問題或內容支柱,且未觸及排除項)`
|
||
);
|
||
}
|
||
|
||
if (
|
||
allowBrave &&
|
||
ranked.length < PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS &&
|
||
isBraveSearchConfigured()
|
||
) {
|
||
const need = PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS - ranked.length;
|
||
await runWebDiscover("Brave 網路搜尋(篩選後補充)", {
|
||
summary: `篩選後僅 ${ranked.length} 篇,再補充網搜(最多 ${PLACEMENT_WEB_SEARCH_MAX_QUERIES} 次)…`,
|
||
maxQueries: PLACEMENT_WEB_SEARCH_MAX_QUERIES,
|
||
targetPosts: Math.min(PLACEMENT_WEB_SEARCH_TARGET_POSTS, need + 8),
|
||
keywordPriority: braveKeywordPriority,
|
||
});
|
||
const refiltered = applyPlacementPostFilters(ranked, contentBand);
|
||
ranked = refiltered.posts;
|
||
if (refiltered.removedStale + refiltered.removedCasual + refiltered.removedBand > 0) {
|
||
await report("網搜補充後已重新套用置入篩選");
|
||
}
|
||
} else if (
|
||
ranked.length < PLACEMENT_WEB_SEARCH_SKIP_IF_POSTS &&
|
||
!isBraveSearchConfigured()
|
||
) {
|
||
await report(
|
||
`篩選後僅 ${ranked.length} 篇;未設定 BRAVE_SEARCH_API_KEY,略過網搜補充`
|
||
);
|
||
}
|
||
}
|
||
|
||
const primaryCount = ranked.length;
|
||
|
||
if (ranked.length === 0) {
|
||
throw new Error(
|
||
placementMode
|
||
? `海巡未找到 ${PLACEMENT_MAX_POST_AGE_DAYS} 天內的適合貼文。建議:① 重新 AI 分析,加強「受眾會問什麼」與「內容支柱」② 到微調面板調整研究地圖 ③ 確認連線設定已開啟 Threads 搜尋`
|
||
: "海巡未找到任何貼文。建議:① 重新 AI 分析取得短詞標籤 ② 勾選 @帳號 ③ 到微調面板補充相似帳號"
|
||
);
|
||
}
|
||
|
||
progressDetail.phase = "save";
|
||
progressDetail.summary = `找到 ${ranked.length} 篇,寫入資料庫…`;
|
||
if (!progressDetail.tasks) progressDetail.tasks = [];
|
||
progressDetail.tasks.push({ id: "save", label: "寫入資料庫", status: "running" });
|
||
await report(progressDetail.summary, progressDetail);
|
||
|
||
const scan = await prisma.scan.create({
|
||
data: {
|
||
accountId: topic.accountId,
|
||
topicId: topic.id,
|
||
scanMode,
|
||
scanGoal: topic.topicGoal,
|
||
scanTags: JSON.stringify(scanTags),
|
||
searchSource: resolveSearchSource(primaryCount, webPosts.length, usedApi, usedBrowser),
|
||
},
|
||
});
|
||
|
||
for (const item of ranked) {
|
||
await assertJobNotCancelled(jobId);
|
||
await prisma.scanItem.create({
|
||
data: {
|
||
scanId: scan.id,
|
||
externalId: item.externalId,
|
||
text: item.text,
|
||
permalink: item.permalink,
|
||
authorName: item.authorName,
|
||
postedAt: item.postedAt,
|
||
likeCount: item.likeCount,
|
||
replyCount: item.replyCount,
|
||
score: item.score,
|
||
searchTag: item.searchTag,
|
||
},
|
||
});
|
||
}
|
||
setTaskStatus(progressDetail, "save", { status: "done", found: ranked.length });
|
||
|
||
// 先抓留言(若有瀏覽器 session):供使用者檢視與後續產文/產留言參考。
|
||
// 沒有瀏覽器時 session 為 null,直接略過,海巡仍照常完成。
|
||
let repliesCount = 0;
|
||
progressDetail.phase = "replies";
|
||
progressDetail.tasks!.push({
|
||
id: "replies",
|
||
label: "抓取留言",
|
||
status: "running",
|
||
startedAt: Date.now(),
|
||
step: "準備中",
|
||
});
|
||
|
||
if (needBrowserForReplies && session) {
|
||
const saved = await prisma.scan.findUnique({
|
||
where: { id: scan.id },
|
||
include: { items: true },
|
||
});
|
||
const items = saved?.items ?? [];
|
||
const replyTargets = [...items]
|
||
.sort((a, b) => b.score - a.score)
|
||
.filter((item) => item.permalink)
|
||
.slice(0, REPLY_FETCH_TOP_N);
|
||
|
||
if (replyTargets.length > 0) {
|
||
progressDetail.summary = `抓取 Top ${replyTargets.length} 篇貼文留言…`;
|
||
setTaskStatus(progressDetail, "replies", {
|
||
status: "running",
|
||
label: `抓取留言(${replyTargets.length} 篇貼文)`,
|
||
});
|
||
await report(progressDetail.summary, progressDetail);
|
||
await assertJobNotCancelled(jobId);
|
||
|
||
const permalinks = replyTargets.map((i) => i.permalink!);
|
||
const repliesByUrl = await getRepliesParallel(
|
||
session.storageState,
|
||
permalinks,
|
||
Math.min(connection.repliesPerPost, 5),
|
||
{
|
||
concurrency: getReplyFetchConcurrency(),
|
||
session,
|
||
onProgress: async (done, total, permalink) => {
|
||
const short = permalink.split("/").slice(-2).join("/") || permalink;
|
||
progressDetail.summary = `抓取留言 ${done}/${total}:${short}`;
|
||
setTaskStatus(progressDetail, "replies", {
|
||
status: "running",
|
||
step: `讀取貼文留言 ${done}/${total}`,
|
||
stepDetail: short,
|
||
startedAt: progressDetail.tasks?.find((t) => t.id === "replies")?.startedAt ?? Date.now(),
|
||
});
|
||
await report(progressDetail.summary, progressDetail);
|
||
},
|
||
}
|
||
);
|
||
|
||
for (const item of replyTargets) {
|
||
await assertJobNotCancelled(jobId);
|
||
const replies = repliesByUrl.get(item.permalink!) ?? [];
|
||
for (const reply of replies) {
|
||
await prisma.reply.create({
|
||
data: {
|
||
scanItemId: item.id,
|
||
text: reply.text,
|
||
authorName: reply.authorName,
|
||
likeCount: reply.likeCount,
|
||
postedAt: reply.postedAt,
|
||
},
|
||
});
|
||
repliesCount += 1;
|
||
}
|
||
}
|
||
setTaskStatus(progressDetail, "replies", {
|
||
status: "done",
|
||
label: "抓取留言",
|
||
found: repliesCount,
|
||
});
|
||
progressDetail.summary = `已抓 ${repliesCount} 則留言`;
|
||
await report(progressDetail.summary, progressDetail);
|
||
} else {
|
||
setTaskStatus(progressDetail, "replies", {
|
||
status: "done",
|
||
label: "抓取留言(無可抓貼文)",
|
||
found: 0,
|
||
});
|
||
progressDetail.summary = "無可抓留言的貼文";
|
||
await report(progressDetail.summary, progressDetail);
|
||
}
|
||
} else {
|
||
const skipLabel = connection.scrapeReplies && !connection.devMode
|
||
? "抓取留言(需 Dev 模式)"
|
||
: needBrowserForReplies && !session
|
||
? "抓取留言(無瀏覽器登入)"
|
||
: "抓取留言(已關閉)";
|
||
setTaskStatus(progressDetail, "replies", {
|
||
status: "done",
|
||
label: skipLabel,
|
||
found: 0,
|
||
});
|
||
progressDetail.summary = skipLabel;
|
||
await report(progressDetail.summary, progressDetail);
|
||
}
|
||
|
||
await prisma.scan.update({
|
||
where: { id: scan.id },
|
||
data: { repliesFetched: repliesCount > 0, repliesCount },
|
||
});
|
||
|
||
const qualityLabel = "整理結果";
|
||
progressDetail.phase = "quality";
|
||
progressDetail.summary = `${qualityLabel}中…`;
|
||
progressDetail.tasks!.push({ id: "quality", label: qualityLabel, status: "running" });
|
||
await report(progressDetail.summary, progressDetail);
|
||
|
||
await assertJobNotCancelled(jobId);
|
||
await applyQualityFilter(scan.id);
|
||
|
||
const visibleCount = await prisma.scanItem.count({
|
||
where: { scanId: scan.id, OR: [{ qualityTier: null }, { qualityTier: { not: "EXCLUDE" } }] },
|
||
});
|
||
|
||
setTaskStatus(progressDetail, "quality", { status: "done", found: visibleCount });
|
||
|
||
await enrichAccountsFromScan(scan.id, topic.id);
|
||
|
||
progressDetail.summary = `完成 · ${ranked.length} 篇 · ${repliesCount} 則留言`;
|
||
await report(progressDetail.summary, progressDetail);
|
||
|
||
return prisma.scan.findUnique({
|
||
where: { id: scan.id },
|
||
include: {
|
||
topic: true,
|
||
items: {
|
||
orderBy: [{ combinedScore: "desc" }, { score: "desc" }],
|
||
include: { replies: { orderBy: { likeCount: "desc" } } },
|
||
},
|
||
},
|
||
});
|
||
}
|
||
|
||
export async function applyQualityFilter(scanId: string) {
|
||
const scan = await prisma.scan.findUnique({
|
||
where: { id: scanId },
|
||
include: { topic: true, items: true },
|
||
});
|
||
if (!scan) throw new Error("找不到海巡紀錄");
|
||
|
||
const placementMode = isPlacementGoal(scan.scanGoal ?? scan.topic.topicGoal);
|
||
|
||
for (const item of scan.items) {
|
||
const isStale =
|
||
placementMode && item.postedAt && !isPostFreshEnough(item.postedAt);
|
||
|
||
if (isStale) {
|
||
await prisma.scanItem.update({
|
||
where: { id: item.id },
|
||
data: {
|
||
relevanceScore: null,
|
||
placementScore: null,
|
||
qualityTier: "EXCLUDE",
|
||
qualityReason: `貼文已超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天,不適合留言置入`,
|
||
placementReason: `貼文已超過 ${PLACEMENT_MAX_POST_AGE_DAYS} 天,不適合留言置入`,
|
||
combinedScore: 0,
|
||
},
|
||
});
|
||
continue;
|
||
}
|
||
|
||
await prisma.scanItem.update({
|
||
where: { id: item.id },
|
||
data: {
|
||
relevanceScore: null,
|
||
placementScore: null,
|
||
qualityTier: null,
|
||
qualityReason: null,
|
||
placementReason: null,
|
||
combinedScore: item.score,
|
||
},
|
||
});
|
||
}
|
||
|
||
return [];
|
||
}
|
||
|
||
async function enrichAccountsFromScan(scanId: string, topicId: string) {
|
||
const scan = await prisma.scan.findUnique({
|
||
where: { id: scanId },
|
||
include: {
|
||
items: {
|
||
where: { OR: [{ qualityTier: null }, { qualityTier: { not: "EXCLUDE" } }] },
|
||
orderBy: { combinedScore: "desc" },
|
||
},
|
||
},
|
||
});
|
||
if (!scan || scan.items.length === 0) return;
|
||
|
||
const cutoff = Math.max(3, Math.ceil(scan.items.length * 0.3));
|
||
const topItems = scan.items.slice(0, cutoff);
|
||
const authorMap = new Map<string, { count: number; maxScore: number; latestPost: Date; reason: string }>();
|
||
|
||
for (const item of topItems) {
|
||
if (!item.authorName) continue;
|
||
const key = item.authorName.toLowerCase();
|
||
const existing = authorMap.get(key);
|
||
if (existing) {
|
||
existing.count++;
|
||
if ((item.combinedScore ?? item.score) > existing.maxScore) {
|
||
existing.maxScore = item.combinedScore ?? item.score;
|
||
}
|
||
if (item.postedAt && item.postedAt > existing.latestPost) {
|
||
existing.latestPost = item.postedAt;
|
||
}
|
||
} else {
|
||
authorMap.set(key, {
|
||
count: 1,
|
||
maxScore: item.combinedScore ?? item.score,
|
||
latestPost: item.postedAt ?? new Date(0),
|
||
reason: item.qualityReason || item.text.slice(0, 80) || "海巡發現的高品質作者",
|
||
});
|
||
}
|
||
}
|
||
|
||
const topic = await prisma.topic.findUnique({ where: { id: topicId } });
|
||
if (!topic) return;
|
||
|
||
const existingMap = parseResearchMap(topic.researchMap);
|
||
if (!existingMap) return;
|
||
|
||
const existingAccounts = existingMap.similarAccounts ?? [];
|
||
const existingByKey = new Map(existingAccounts.map((a) => [a.username.toLowerCase(), a]));
|
||
|
||
const newAccounts: SimilarAccount[] = [];
|
||
for (const [key, data] of authorMap) {
|
||
const existing = existingByKey.get(key);
|
||
if (existing) {
|
||
if (!existing.confidence || existing.confidence === "low") {
|
||
existing.confidence = data.count >= 2 ? "high" : "medium";
|
||
}
|
||
if (data.latestPost > new Date(existing.lastActiveAt ?? 0)) {
|
||
existing.lastActiveAt = data.latestPost.toISOString();
|
||
}
|
||
existingByKey.set(key, existing);
|
||
} else {
|
||
newAccounts.push({
|
||
username: key,
|
||
reason: data.reason,
|
||
source: "scan",
|
||
profileUrl: threadsProfileUrl(key) ?? undefined,
|
||
confidence: data.count >= 2 ? "high" : "medium",
|
||
lastActiveAt: data.latestPost.toISOString(),
|
||
});
|
||
}
|
||
}
|
||
|
||
if (newAccounts.length === 0) return;
|
||
|
||
const merged = [...newAccounts, ...existingByKey.values()];
|
||
|
||
await prisma.topic.update({
|
||
where: { id: topicId },
|
||
data: { researchMap: JSON.stringify({ ...existingMap, similarAccounts: merged }) },
|
||
});
|
||
}
|
||
|
||
export async function runScanForAllActiveTopics(accountId?: string | null) {
|
||
const topics = await prisma.topic.findMany({
|
||
where: { active: true, ...(accountId ? { accountId } : {}) },
|
||
});
|
||
const results = [];
|
||
for (const topic of topics) {
|
||
const scan = await runScanForTopic(topic.id);
|
||
results.push(scan);
|
||
await humanDelay(2000, 4000);
|
||
}
|
||
return results;
|
||
}
|