haixunMaster/lib/services/scan-tasks.ts

273 lines
8.3 KiB
TypeScript
Raw Normal View History

2026-06-21 12:50:31 +00:00
import { isOnTopicTag } from "@/lib/topic-anchor";
import { isPlacementGoal } from "@/lib/types/topic-goal";
import {
isAccountTag,
normalizeUsername,
type ResearchMap,
} from "@/lib/types/research";
export type ScanTaskKind = "keyword" | "account";
export interface ScanTask {
id: string;
kind: ScanTaskKind;
query: string;
label: string;
limit: number;
}
import { getBrowserConcurrency } from "@/lib/threads-browser/human-behavior";
const MAX_CONCURRENCY = 2;
export function splitScanTasks(tasks: ScanTask[]): {
keywordTasks: ScanTask[];
accountTasks: ScanTask[];
} {
return {
keywordTasks: tasks.filter((t) => t.kind === "keyword"),
accountTasks: tasks.filter((t) => t.kind === "account"),
};
}
/** 使用者明確勾選的 @帳號(不自動補研究地圖相似帳號)。 */
export function getSelectedAccountUsernames(selectedTags: string[]): string[] {
return selectedTags
.filter(isAccountTag)
.map((tag) => normalizeUsername(tag))
.filter(Boolean);
}
export function hasSelectedAccountTags(selectedTags: string[]): boolean {
return getSelectedAccountUsernames(selectedTags).length > 0;
}
/** 有限平行 + 預設單線程,降低被 Meta 判定為機器人的風險 */
export function resolveScanConcurrency(taskCount: number): number {
const cap = Math.min(MAX_CONCURRENCY, getBrowserConcurrency());
return Math.min(Math.max(taskCount, 1), cap);
}
function addTask(
tasks: ScanTask[],
seen: Set<string>,
task: Omit<ScanTask, "id">
) {
const key = `${task.kind}:${task.query}`;
if (seen.has(key)) return;
seen.add(key);
tasks.push({ ...task, id: key });
}
const PLACEMENT_QUERY_PREFIX_RE = /^(請問|想問|有人|大家|想知道|想請教|求助|請益)/;
const PLACEMENT_QUERY_MAX = 8;
/** 將研究地圖的問題/支柱轉成 Threads 可搜尋的關鍵字 */
export function normalizePlacementSearchPhrase(text: string): string {
return text
.replace(/[?!。.、,;:""''「」【】()]/g, "")
.replace(PLACEMENT_QUERY_PREFIX_RE, "")
.replace(/\s+/g, "")
.trim()
.slice(0, 28);
}
function placementPhraseScore(phrase: string, seedQuery: string): number {
if (phrase.length < 3) return -1;
if (phrase.length > 28) return -1;
let score = phrase.length;
if (seedQuery && (phrase.includes(seedQuery) || seedQuery.includes(phrase))) score += 6;
if (/推薦|請益|求助|請問|怎麼|用什麼|哪款|困擾|煩惱|怕|癢|過敏/.test(phrase)) score += 4;
return score;
}
/** 置入模式:從受眾問題 + 內容支柱自動組搜尋詞(不需手動勾 tag */
export function resolvePlacementSearchQueries(
researchMap: ResearchMap | null | undefined,
seedQuery: string,
selectedTags: string[] = []
): string[] {
if (!researchMap) {
const q = normalizePlacementSearchPhrase(seedQuery);
return q.length >= 3 ? [q] : [];
}
const candidates: Array<{ phrase: string; score: number }> = [];
const preferredTags = selectedTags.length > 0
? selectedTags
: (researchMap.suggestedTags ?? [])
.filter((tag) => tag.searchType !== "帳號" && !isAccountTag(tag.tag))
.map((tag) => tag.tag);
for (const item of preferredTags) {
const phrase = normalizePlacementSearchPhrase(item);
const score = placementPhraseScore(phrase, seedQuery);
if (score >= 0) candidates.push({ phrase, score: score + 30 });
}
for (const item of [...(researchMap.questions ?? []), ...(researchMap.pillars ?? [])]) {
const phrase = normalizePlacementSearchPhrase(item);
const score = placementPhraseScore(phrase, seedQuery);
if (score >= 0) candidates.push({ phrase, score });
}
const seed = normalizePlacementSearchPhrase(seedQuery);
if (seed.length >= 3) {
candidates.push({ phrase: seed, score: placementPhraseScore(seed, seedQuery) + 2 });
}
const seen = new Set<string>();
const sorted = candidates
.sort((a, b) => b.score - a.score)
.filter(({ phrase }) => {
const key = phrase.toLowerCase();
if (seen.has(key)) return false;
seen.add(key);
return true;
})
.slice(0, PLACEMENT_QUERY_MAX);
return sorted.map((c) => c.phrase);
}
export function hasPlacementSearchSources(
researchMap: ResearchMap | null | undefined,
seedQuery = ""
): boolean {
return resolvePlacementSearchQueries(researchMap, seedQuery).length > 0;
}
export function buildPlacementScanTasks(params: {
researchMap?: ResearchMap | null;
seedQuery: string;
selectedTags?: string[];
}): ScanTask[] {
const queries = resolvePlacementSearchQueries(
params.researchMap,
params.seedQuery,
params.selectedTags
);
const tasks: ScanTask[] = [];
const seen = new Set<string>();
for (const query of queries) {
addTask(tasks, seen, {
kind: "keyword",
query,
label: query,
limit: 14,
});
}
return tasks;
}
export function buildScanTasks(params: {
selectedTags: string[];
researchMap?: ResearchMap | null;
seedQuery: string;
topicGoal?: string | null;
topicLabel?: string;
}): ScanTask[] {
const { selectedTags, researchMap, seedQuery, topicGoal, topicLabel = seedQuery } = params;
const placementMode = isPlacementGoal(topicGoal);
const tagMeta = new Map(
(researchMap?.suggestedTags ?? []).map((t) => [t.tag, t])
);
const tasks: ScanTask[] = [];
const seen = new Set<string>();
for (const tag of selectedTags) {
const meta = tagMeta.get(tag);
const isAccount =
isAccountTag(tag) || meta?.searchType === "帳號";
if (isAccount) {
if (placementMode) continue;
const username = normalizeUsername(tag);
const isApprovedTopicAccount =
meta?.searchType === "帳號" ||
(researchMap?.similarAccounts ?? []).some(
(account) => normalizeUsername(account.username).toLowerCase() === username.toLowerCase()
);
// 手動輸入、但未被研究地圖確認為同領域的帳號,不直接整頁爬取。
if (!isApprovedTopicAccount) continue;
addTask(tasks, seen, {
kind: "account",
query: username,
label: `@${username}`,
limit: 15,
});
continue;
}
// 研究地圖內的標籤已經過專用 AI 與自然搜尋詞驗證,不再要求字面包含完整種子詞。
// 只有不在研究地圖中的手動標籤,才套用嚴格主題錨點防止跑題。
const isApprovedResearchTag = Boolean(meta);
if (
!isAccountTag(tag) &&
!isApprovedResearchTag &&
!isOnTopicTag(tag, { label: topicLabel, query: seedQuery })
) {
continue;
}
const isShort = meta?.searchType === "短詞" || tag.length <= 4;
const isNeedTag =
meta?.searchIntent === "需求" ||
meta?.searchIntent === "求助" ||
meta?.searchIntent === "痛點";
addTask(tasks, seen, {
kind: "keyword",
query: tag,
label: tag,
limit: placementMode ? (isNeedTag ? 16 : isShort ? 14 : 12) : isShort ? 15 : 12,
});
}
return tasks;
}
export function pickDefaultSelectedTags(
researchMap: ResearchMap,
topicGoal?: string | null
): string[] {
const tags = researchMap.suggestedTags;
const shorts = tags.filter((t) => t.searchType === "短詞").map((t) => t.tag);
const quotes = tags.filter((t) => t.searchType === "語錄").map((t) => t.tag);
const scenes = tags.filter((t) => t.searchType === "情境").map((t) => t.tag);
const needTags = tags
.filter((t) => t.searchIntent === "需求" || t.searchIntent === "求助" || t.searchIntent === "痛點")
.map((t) => t.tag);
const acctTags = tags
.filter((t) => t.searchType === "帳號")
.map((t) => (t.tag.startsWith("@") ? t.tag : `@${t.tag}`));
const isPlacement = topicGoal === "placement";
const picked = isPlacement
? [
...needTags.slice(0, 6),
...scenes.slice(0, 5),
...shorts.slice(0, 4),
...quotes.slice(0, 1),
]
: [
...shorts.slice(0, 4),
...scenes.slice(0, 2),
...quotes.slice(0, 1),
...acctTags.slice(0, 1),
];
const target = isPlacement ? 10 : 8;
const balanced = [...new Set(picked)];
const fallbackOrder = [
...tags.filter((t) => t.searchType !== "帳號" && !isAccountTag(t.tag)).map((t) => t.tag),
...acctTags.slice(0, 2),
];
for (const tag of fallbackOrder) {
if (balanced.length >= target) break;
if (!balanced.includes(tag)) balanced.push(tag);
}
return balanced.slice(0, target);
}